fix(gate-1): resolve merge conflicts with main

Three add/add + content conflicts, all mechanical:
- WorkspaceUsage.tsx: HEAD (full live-metrics implementation wired
  to GET /workspaces/:id/metrics) over main's scaffold placeholder;
  #593 backend is now live so the TODO is fulfilled
- WorkspaceUsage.test.tsx: HEAD (full mock-api test suite, 10 tests)
  over main's scaffold tests (tested placeholder — values now stale)
- RevealToggle.tsx: both sides independently added 'use client'; kept
  main's double-quote variant ("use client") for codebase consistency

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI · triage-operator 2026-04-17 06:16:36 +00:00
commit bb18f79343
68 changed files with 7458 additions and 177 deletions

View File

@ -1,39 +1,25 @@
name: publish-platform-image
# Builds and pushes the tenant-platform Docker image to GHCR whenever a
# commit lands on main. The private molecule-controlplane provisioner sets
# TENANT_IMAGE=ghcr.io/molecule-ai/platform:<tag> to spawn tenant Fly
# Machines from this image. See molecule-controlplane README for the pairing.
# Builds and pushes the platform Docker images to GHCR whenever a commit
# lands on main. EC2 tenant instances pull the tenant image from GHCR.
on:
push:
branches: [main]
paths:
# Only rebuild when something platform-relevant changes — saves GHA
# minutes on docs-only / canvas-only / MCP-only PRs.
- 'platform/**'
- 'canvas/**'
- 'manifest.json'
- '.github/workflows/publish-platform-image.yml'
# Templates now live in standalone repos — template changes no longer
# trigger a platform rebuild. Use workflow_dispatch to manually rebuild
# if a template repo update needs to be baked into the image.
# Manual trigger for re-publishing a tag after a non-platform merge.
workflow_dispatch:
permissions:
contents: read
packages: write # required to push to ghcr.io/${{ github.repository_owner }}/*
packages: write
env:
# GHCR accepts mixed-case, but most tooling lowercases — keep us consistent.
IMAGE_NAME: ghcr.io/molecule-ai/platform
# Fly registry mirror — tenant machines provisioned by the private
# `molecule-controlplane` pull from here (private GHCR image can't be
# pulled by Fly machines without auth plumbing we don't want to add).
# Fly auto-authenticates same-org machines against registry.fly.io, so
# mirroring keeps GHCR private while tenants still boot.
FLY_IMAGE_NAME: registry.fly.io/molecule-tenant
TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
jobs:
build-and-push:
@ -42,83 +28,33 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Configure registry auth (write auths map; do NOT call docker login)
# `docker login` on macOS unconditionally writes credentials to the
# osxkeychain credential helper, even when DOCKER_CONFIG/config.json
# declares `credsStore: ""` and even when invoked with `--config`.
# Verified locally 2026-04-16 — after a successful login, Docker
# rewrites the same config file to:
# { "auths": { "ghcr.io": {} }, "credsStore": "osxkeychain" }
# i.e. the auth lives in the Keychain, not the config file. The
# Mac mini runner is a launchd user agent with a locked Keychain,
# so storage fails with `User interaction is not allowed (-25308)`.
#
# Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling
# `docker login` and tried to coerce credsStore — none worked.
# The only reliable fix is to skip `docker login` entirely and write
# the auth strings directly. `docker/build-push-action@v5` and the
# daemon honor the `auths` map for push without needing login.
#
# Fly registry username MUST be literal "x" (verified 2026-04-15) —
# any other value returns 401. FLY_API_TOKEN lives in GitHub Actions
# secrets AND in `fly secrets` on molecule-cp; see
# docs/runbooks/saas-secrets.md before rotating.
- name: Configure GHCR auth
shell: bash
env:
GHCR_USER: ${{ github.actor }}
GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
FLY_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
set -eu
mkdir -p "${RUNNER_TEMP}/docker-config"
GHCR_AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64)
FLY_AUTH=$(printf '%s:%s' 'x' "${FLY_TOKEN}" | base64)
umask 077
cat > "${RUNNER_TEMP}/docker-config/config.json" <<JSON
{
"auths": {
"ghcr.io": { "auth": "${GHCR_AUTH}" },
"registry.fly.io": { "auth": "${FLY_AUTH}" }
}
}
JSON
printf '{"auths":{"ghcr.io":{"auth":"%s"}}}' "${GHCR_AUTH}" > "${RUNNER_TEMP}/docker-config/config.json"
echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config" >> "${GITHUB_ENV}"
# Diagnostics that don't leak the tokens.
echo "=== docker ==="
command -v docker || echo "(docker not in PATH)"
docker --version 2>&1 || true
ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true
echo "=== auths registries (no values) ==="
grep -o '"[a-zA-Z0-9.-]*\.io"' "${RUNNER_TEMP}/docker-config/config.json" || true
- name: Set up QEMU
# Required on the Apple-silicon self-hosted runner — Fly tenant machines
# pull linux/amd64, and buildx needs binfmt handlers in Docker Desktop's
# VM to emulate amd64 during the build.
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64
- name: Set up Docker Buildx
# Buildx enables cache-from/cache-to via GHA cache and multi-arch
# builds without local docker daemon wrangling.
uses: docker/setup-buildx-action@v3
- name: Compute tags
id: tags
# Emit two tags per build: `latest` (floating, always the main tip)
# and the short commit SHA (immutable, pin-friendly). Control plane
# can deploy `latest` today and pin to :sha in Phase H hardening.
run: |
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Build & push to GHCR
# Split from the Fly mirror so a registry.fly.io outage doesn't block
# GHCR (or vice versa) — each registry's failure mode is isolated.
# GHA cache is shared because both steps re-use the same Dockerfile
# context + build args.
# Explicit linux/amd64 target: the runner is Apple-silicon (arm64),
# but Fly tenant machines are amd64. QEMU handles the emulation.
- name: Build & push platform image to GHCR
uses: docker/build-push-action@v5
with:
context: .
@ -133,13 +69,9 @@ jobs:
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform (one instance per org)
org.opencontainers.image.description=Molecule AI platform (Go API server)
- name: Build & push tenant image to Fly registry
# Tenant image = Go platform + Canvas (Next.js) in one container.
# Uses Dockerfile.tenant which includes the canvas build + reverse proxy.
# Continues even if GHCR push failed.
if: always()
- name: Build & push tenant image to GHCR
uses: docker/build-push-action@v5
with:
context: .
@ -147,31 +79,11 @@ jobs:
platforms: linux/amd64
push: true
tags: |
${{ env.FLY_IMAGE_NAME }}:latest
${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
${{ env.TENANT_IMAGE_NAME }}:latest
${{ env.TENANT_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform + canvas (one instance per org)
- name: Install flyctl
uses: superfly/flyctl-actions/setup-flyctl@master
- name: Deploy to Fly tenant machines
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
MACHINES=$(flyctl machines list -a molecule-tenant --json | jq -r '.[] | select(.state == "started" or .state == "stopped") | .id')
if [ -z "$MACHINES" ]; then
echo "No tenant machines found — skipping deploy (control plane provisions on demand)"
exit 0
fi
for id in $MACHINES; do
echo "Updating machine $id to sha-${{ steps.tags.outputs.sha }}..."
flyctl machines update "$id" \
--image "${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}" \
-a molecule-tenant \
--yes
done
echo "All tenant machines updated to sha-${{ steps.tags.outputs.sha }}"
org.opencontainers.image.description=Molecule AI tenant platform + canvas (one EC2 instance per org)

4
.gitignore vendored
View File

@ -44,6 +44,10 @@ venv/
*.egg-info/
.pytest_cache/
# Brand monitor runtime state (never commit)
brand-monitor/.surge_state.json
brand-monitor/.monitor_state.json
# Docker
*.log

View File

@ -1,15 +1,20 @@
"use client";
import { useEffect } from "react";
import { useEffect, useState } from "react";
import { Canvas } from "@/components/Canvas";
import { Legend } from "@/components/Legend";
import { CommunicationOverlay } from "@/components/CommunicationOverlay";
import { Spinner } from "@/components/Spinner";
import { connectSocket, disconnectSocket } from "@/store/socket";
import { useCanvasStore } from "@/store/canvas";
import { api } from "@/lib/api";
import type { WorkspaceData } from "@/store/socket";
export default function Home() {
const hydrationError = useCanvasStore((s) => s.hydrationError);
const setHydrationError = useCanvasStore((s) => s.setHydrationError);
const [hydrating, setHydrating] = useState(true);
useEffect(() => {
connectSocket();
@ -23,8 +28,13 @@ export default function Home() {
useCanvasStore.getState().setViewport(viewport);
}
}).catch((err) => {
// Initial hydration failed — socket reconnect will retry
// Initial hydration failed — show error banner to user
console.error("Canvas: initial hydration failed", err);
useCanvasStore.getState().setHydrationError(
err instanceof Error && err.message ? err.message : "Failed to load canvas"
);
}).finally(() => {
setHydrating(false);
});
return () => {
@ -32,11 +42,39 @@ export default function Home() {
};
}, []);
if (hydrating) {
return (
<div className="fixed inset-0 flex items-center justify-center bg-zinc-950">
<div className="flex flex-col items-center gap-3">
<Spinner size="lg" />
<span className="text-xs text-zinc-500">Loading canvas...</span>
</div>
</div>
);
}
return (
<>
<Canvas />
<Legend />
<CommunicationOverlay />
{hydrationError && (
<div
role="alert"
className="fixed inset-0 flex flex-col items-center justify-center bg-zinc-950 text-zinc-300 gap-4 z-[9999]"
>
<p className="text-zinc-400 text-sm">{hydrationError}</p>
<button
onClick={() => {
setHydrationError(null);
window.location.reload();
}}
className="px-4 py-2 bg-blue-600 hover:bg-blue-500 text-white rounded-md text-sm"
>
Retry
</button>
</div>
)}
</>
);
}

View File

@ -235,6 +235,14 @@ export function ContextMenu() {
closeContextMenu();
}, [contextMenu, nestNode, closeContextMenu]);
const handleZoomToTeam = useCallback(() => {
if (!contextMenu) return;
window.dispatchEvent(
new CustomEvent("molecule:zoom-to-team", { detail: { nodeId: contextMenu.nodeId } })
);
closeContextMenu();
}, [contextMenu, closeContextMenu]);
if (!contextMenu) return null;
const isOfflineOrFailed = contextMenu.nodeData.status === "offline" || contextMenu.nodeData.status === "failed";
@ -253,7 +261,10 @@ export function ContextMenu() {
? [{ label: "Extract from Team", icon: "⤴", action: handleRemoveFromTeam }]
: []),
...(hasChildren
? [{ label: "Collapse Team", icon: "◁", action: handleCollapse }]
? [
{ label: "Collapse Team", icon: "◁", action: handleCollapse },
{ label: "Zoom to Team", icon: "⊕", action: handleZoomToTeam },
]
: [{ label: "Expand to Team", icon: "▷", action: handleExpand }]),
{ label: "", icon: "", action: () => {}, divider: true },
...(isPaused

View File

@ -1,6 +1,6 @@
"use client";
import { useState, useEffect } from "react";
import { useState, useEffect, useRef, useCallback, useId } from "react";
import * as Dialog from "@radix-ui/react-dialog";
import { api } from "@/lib/api";
@ -42,6 +42,7 @@ export function CreateWorkspaceButton() {
const [tier, setTier] = useState(1);
const [template, setTemplate] = useState("");
const [parentId, setParentId] = useState("");
const [budgetLimit, setBudgetLimit] = useState("");
const [creating, setCreating] = useState(false);
const [error, setError] = useState<string | null>(null);
const [workspaces, setWorkspaces] = useState<WorkspaceOption[]>([]);
@ -50,6 +51,33 @@ export function CreateWorkspaceButton() {
const [hermesProvider, setHermesProvider] = useState("anthropic");
const [hermesApiKey, setHermesApiKey] = useState("");
// Refs for roving tabIndex on the tier radio group (WCAG 2.1 arrow-key nav)
const radioRefs = useRef<Array<HTMLButtonElement | null>>([]);
const TIERS = [
{ value: 1, label: "T1", desc: "Sandboxed" },
{ value: 2, label: "T2", desc: "Standard" },
{ value: 3, label: "T3", desc: "Full Access" },
];
const handleRadioKeyDown = useCallback(
(e: React.KeyboardEvent, currentIndex: number) => {
if (e.key === "ArrowDown" || e.key === "ArrowRight") {
e.preventDefault();
const next = (currentIndex + 1) % TIERS.length;
setTier(TIERS[next].value);
radioRefs.current[next]?.focus();
} else if (e.key === "ArrowUp" || e.key === "ArrowLeft") {
e.preventDefault();
const prev = (currentIndex - 1 + TIERS.length) % TIERS.length;
setTier(TIERS[prev].value);
radioRefs.current[prev]?.focus();
}
},
// TIERS is stable (module-level constant pattern), setTier is stable from useState
// eslint-disable-next-line react-hooks/exhaustive-deps
[]
);
const isHermes = template.trim().toLowerCase() === "hermes";
// Reset form and load workspaces whenever dialog opens
@ -60,6 +88,7 @@ export function CreateWorkspaceButton() {
setTier(1);
setTemplate("");
setParentId("");
setBudgetLimit("");
setError(null);
setHermesProvider("anthropic");
setHermesApiKey("");
@ -86,12 +115,17 @@ export function CreateWorkspaceButton() {
: undefined;
try {
const parsedBudget = budgetLimit.trim()
? parseFloat(budgetLimit)
: null;
await api.post("/workspaces", {
name: name.trim(),
role: role.trim() || undefined,
template: template.trim() || undefined,
tier,
parent_id: parentId || undefined,
budget_limit: parsedBudget,
canvas: { x: Math.random() * 400 + 100, y: Math.random() * 300 + 100 },
...(isHermes && provider
? { secrets: { [provider.envVar]: hermesApiKey.trim() } }
@ -155,6 +189,14 @@ export function CreateWorkspaceButton() {
onChange={setRole}
placeholder="e.g. SEO Specialist"
/>
<InputField
label="Budget limit (USD)"
value={budgetLimit}
onChange={setBudgetLimit}
placeholder="e.g. 100"
type="number"
helper="Leave blank for unlimited"
/>
<InputField
label="Template"
value={template}
@ -172,16 +214,15 @@ export function CreateWorkspaceButton() {
<div className="col-span-3 text-[11px] text-zinc-400 mb-1">
Tier
</div>
{[
{ value: 1, label: "T1", desc: "Sandboxed" },
{ value: 2, label: "T2", desc: "Standard" },
{ value: 3, label: "T3", desc: "Full Access" },
].map((t) => (
{TIERS.map((t, idx) => (
<button
key={t.value}
ref={(el) => { radioRefs.current[idx] = el; }}
role="radio"
aria-checked={tier === t.value}
tabIndex={tier === t.value ? 0 : -1}
onClick={() => setTier(t.value)}
onKeyDown={(e) => handleRadioKeyDown(e, idx)}
className={`py-2 rounded-lg text-center transition-colors ${
tier === t.value
? "bg-blue-600/20 border border-blue-500/50 text-blue-300"
@ -315,6 +356,8 @@ function InputField({
placeholder,
required,
mono,
type = "text",
helper,
}: {
label: string;
value: string;
@ -322,10 +365,16 @@ function InputField({
placeholder?: string;
required?: boolean;
mono?: boolean;
type?: string;
helper?: string;
}) {
// useId() generates a stable, unique ID for the label↔input association,
// satisfying WCAG 2.1 SC 1.3.1 (Info and Relationships, Level A).
const inputId = useId();
return (
<div>
<label className="text-[11px] text-zinc-400 block mb-1">
<label htmlFor={inputId} className="text-[11px] text-zinc-400 block mb-1">
{label}{" "}
{required && (
<>
@ -337,11 +386,18 @@ function InputField({
)}
</label>
<input
id={inputId}
type={type}
value={value}
onChange={(e) => onChange(e.target.value)}
placeholder={placeholder}
className={`w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 placeholder-zinc-600 focus:outline-none focus:border-blue-500/60 focus:ring-1 focus:ring-blue-500/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
min={type === "number" ? "0" : undefined}
step={type === "number" ? "0.01" : undefined}
className={`w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 placeholder-zinc-500 focus:outline-none focus:border-blue-500/60 focus:ring-1 focus:ring-blue-500/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
/>
{helper && (
<p className="mt-1 text-xs text-zinc-500">{helper}</p>
)}
</div>
);
}

View File

@ -173,7 +173,7 @@ export function SidePanel() {
else if (e.key === "End") { e.preventDefault(); next = TABS.length - 1; }
if (next !== null) {
setPanelTab(TABS[next].id);
requestAnimationFrame(() => { document.getElementById(`tab-${TABS[next!].id}`)?.focus(); });
requestAnimationFrame(() => { const el = document.getElementById(`tab-${TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
}
}}
>

View File

@ -0,0 +1,221 @@
// @vitest-environment jsdom
/**
* DetailsTab integration tests for issue #541.
*
* Budget-specific logic (stats, progress bar, PATCH /budget, 402 handling) is
* fully covered by BudgetSection.test.tsx this file focuses on:
* 1. BudgetSection being mounted inside DetailsTab
* 2. The workspace edit form (name / role / tier) no longer carrying
* budget_limit that concern lives in BudgetSection now
* 3. PATCH /workspaces/:id body integrity (no accidental budget_limit leak)
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
// ── Mocks ─────────────────────────────────────────────────────────────────────
vi.mock("@/lib/api", () => ({
api: {
get: vi.fn(),
patch: vi.fn(),
del: vi.fn(),
post: vi.fn(),
},
}));
vi.mock("@/store/canvas", () => ({
useCanvasStore: vi.fn((selector: (s: unknown) => unknown) =>
selector({
updateNodeData: mockUpdateNodeData,
removeNode: vi.fn(),
selectNode: vi.fn(),
})
),
}));
vi.mock("../StatusDot", () => ({ StatusDot: () => null }));
// Mock BudgetSection — it has its own test suite (BudgetSection.test.tsx).
// Without this mock its internal api.get would fire against the shared mock
// and cause type errors when the return is not a valid BudgetData object.
vi.mock("../tabs/BudgetSection", () => ({
BudgetSection: ({ workspaceId }: { workspaceId: string }) => (
<div data-testid="budget-section-stub" data-ws={workspaceId} />
),
}));
import { api } from "@/lib/api";
import { DetailsTab } from "../tabs/DetailsTab";
const mockPatch = vi.mocked(api.patch);
const mockGet = vi.mocked(api.get);
const mockUpdateNodeData = vi.fn();
// ── Helpers ───────────────────────────────────────────────────────────────────
function makeData(overrides: Record<string, unknown> = {}) {
return {
name: "Test Agent",
role: "Researcher",
tier: 1,
status: "online",
agentCard: null,
activeTasks: 0,
collapsed: false,
lastErrorRate: 0,
lastSampleError: "",
url: "http://localhost:8080",
parentId: null,
currentTask: "",
runtime: "langgraph",
needsRestart: false,
budgetLimit: null,
budgetUsed: null,
...overrides,
};
}
beforeEach(() => {
vi.clearAllMocks();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue([] as any);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValue({} as any);
});
afterEach(() => {
cleanup();
});
async function openEdit() {
const editBtn = screen.getAllByRole("button").find((b) => b.textContent === "Edit");
fireEvent.click(editBtn!);
await waitFor(() =>
expect(screen.getAllByRole("button").some((b) => b.textContent === "Save")).toBe(true)
);
}
// ── BudgetSection mounting ────────────────────────────────────────────────────
describe("DetailsTab — BudgetSection integration", () => {
it("renders BudgetSection with the correct workspaceId", () => {
render(<DetailsTab workspaceId="ws-42" data={makeData()} />);
const stub = screen.getByTestId("budget-section-stub");
expect(stub).toBeTruthy();
expect(stub.getAttribute("data-ws")).toBe("ws-42");
});
});
// ── Workspace edit form (no budget_limit) ──────────────────────────────────────
describe("DetailsTab — workspace edit form does not include budget_limit", () => {
it("does NOT show a 'Budget limit (USD)' input in the edit form", async () => {
render(<DetailsTab workspaceId="ws-1" data={makeData()} />);
await openEdit();
// Budget limit (USD) was the old inline field label — must be absent now
expect(screen.queryByPlaceholderText("Leave blank for unlimited")).toBeNull();
expect(screen.queryByText("Budget limit (USD)")).toBeNull();
});
it("PATCH /workspaces/:id body does NOT include budget_limit", async () => {
render(<DetailsTab workspaceId="ws-1" data={makeData({ name: "My Agent" })} />);
await openEdit();
const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
fireEvent.click(saveBtn!);
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(Object.prototype.hasOwnProperty.call(body, "budget_limit")).toBe(false);
});
it("PATCH /workspaces/:id body includes name, role, and tier", async () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ name: "Alpha", role: "Writer", tier: 2 })}
/>
);
await openEdit();
const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
fireEvent.click(saveBtn!);
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.name).toBe("Alpha");
expect(body.role).toBe("Writer");
expect(body.tier).toBe(2);
});
it("Cancel reverts name, role, tier without touching budget state", async () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ name: "Original", role: "Dev" })}
/>
);
await openEdit();
// Modify name
fireEvent.change(
screen.getAllByRole("textbox").find((i) => (i as HTMLInputElement).value === "Original")!,
{ target: { value: "Modified" } }
);
const cancelBtn = screen.getAllByRole("button").find((b) => b.textContent === "Cancel");
fireEvent.click(cancelBtn!);
// Should be back in read view — no Save button visible
expect(screen.queryAllByRole("button").some((b) => b.textContent === "Save")).toBe(false);
// Workspace info unchanged in read view
expect(screen.getByText("Original")).toBeTruthy();
});
it("updateNodeData is called with name/role/tier but NOT budgetLimit on save", async () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ name: "Bot", role: "Analyst", tier: 1 })}
/>
);
await openEdit();
const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
fireEvent.click(saveBtn!);
await waitFor(() => expect(mockUpdateNodeData).toHaveBeenCalled());
const updateArgs = mockUpdateNodeData.mock.calls[0][1] as Record<string, unknown>;
expect(updateArgs.name).toBe("Bot");
expect(updateArgs.role).toBe("Analyst");
expect(updateArgs.tier).toBe(1);
expect(Object.prototype.hasOwnProperty.call(updateArgs, "budgetLimit")).toBe(false);
});
});
// ── budget-exceeded-badge removed from DetailsTab ────────────────────────────
describe("DetailsTab — no inline budget-exceeded-badge", () => {
it("does NOT render budget-exceeded-badge even when budgetUsed > budgetLimit (BudgetSection owns that)", () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ budgetLimit: 10, budgetUsed: 99 })}
/>
);
// The old inline badge is gone — BudgetSection.tsx owns the exceeded state
expect(screen.queryByTestId("budget-exceeded-badge")).toBeNull();
});
it("does NOT render inline Budget limit row in read view", () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ budgetLimit: 100 })}
/>
);
// "$100.00" and "Unlimited" are rendered by BudgetSection now
expect(screen.queryByText("$100.00")).toBeNull();
expect(screen.queryByText("Unlimited")).toBeNull();
});
});

View File

@ -0,0 +1,389 @@
// @vitest-environment jsdom
/**
* Tests for BudgetSection (issue #541).
*
* Covers:
* - Loading state
* - Stats row: used / limit, "Unlimited" when null
* - Progress bar: correct percentage, capped at 100%, absent when no limit
* - Budget remaining text
* - Input pre-fill (existing limit / blank when null)
* - Save: PATCH with number, PATCH with null (blank input)
* - 402 on GET exceeded banner, no fetch-error text
* - 402 on PATCH exceeded banner
* - Non-402 fetch error error text
* - Non-402 save error save error alert
* - Section header and subheading
* - Fetch error does not show stats
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import {
render,
screen,
fireEvent,
waitFor,
cleanup,
act,
} from "@testing-library/react";
// ── Mock api ──────────────────────────────────────────────────────────────────
vi.mock("@/lib/api", () => ({
api: {
get: vi.fn(),
patch: vi.fn(),
},
}));
import { api } from "@/lib/api";
import { BudgetSection } from "../tabs/BudgetSection";
const mockGet = vi.mocked(api.get);
const mockPatch = vi.mocked(api.patch);
// ── Helpers ───────────────────────────────────────────────────────────────────
function budgetResponse(overrides: Partial<{
budget_limit: number | null;
budget_used: number;
budget_remaining: number | null;
}> = {}) {
return {
budget_limit: 1000,
budget_used: 250,
budget_remaining: 750,
...overrides,
};
}
function make402Error(): Error {
return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
}
function make402PatchError(): Error {
return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
}
function makeGenericError(msg = "network timeout"): Error {
return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
}
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
cleanup();
});
// ── Rendering helpers ─────────────────────────────────────────────────────────
async function renderLoaded(budgetData = budgetResponse()) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetData as any);
render(<BudgetSection workspaceId="ws-1" />);
// Wait for loading to finish
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
}
// ── Loading state ─────────────────────────────────────────────────────────────
describe("BudgetSection — loading state", () => {
it("shows loading indicator while fetch is in flight", () => {
// Never resolve
mockGet.mockReturnValue(new Promise(() => {}));
render(<BudgetSection workspaceId="ws-1" />);
expect(screen.getByTestId("budget-loading")).toBeTruthy();
expect(screen.getByText("Loading…")).toBeTruthy();
});
it("hides loading indicator after fetch resolves", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetResponse() as any);
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
});
});
// ── Section header ────────────────────────────────────────────────────────────
describe("BudgetSection — header and subheading", () => {
it("renders 'Budget' as the section heading", async () => {
await renderLoaded();
expect(screen.getByText("Budget")).toBeTruthy();
});
it("renders the subheading 'Limit total message credits for this workspace'", async () => {
await renderLoaded();
expect(
screen.getByText("Limit total message credits for this workspace")
).toBeTruthy();
});
it("renders 'Budget limit (credits)' label for the input", async () => {
await renderLoaded();
expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
});
});
// ── Stats row ─────────────────────────────────────────────────────────────────
describe("BudgetSection — stats row", () => {
it("shows budget_used in the stats row", async () => {
await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
});
it("shows budget_limit in the stats row", async () => {
await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
});
it("shows 'Unlimited' when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
});
it("shows budget_remaining when present", async () => {
await renderLoaded(budgetResponse({ budget_remaining: 750 }));
expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
});
it("hides budget_remaining row when null", async () => {
await renderLoaded(budgetResponse({ budget_remaining: null }));
expect(screen.queryByTestId("budget-remaining")).toBeNull();
});
});
// ── Progress bar ──────────────────────────────────────────────────────────────
describe("BudgetSection — progress bar", () => {
it("renders the progress bar when budget_limit is set", async () => {
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
expect(screen.getByRole("progressbar")).toBeTruthy();
});
it("does NOT render progress bar when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
expect(screen.queryByRole("progressbar")).toBeNull();
});
it("fills to the correct percentage (25%)", async () => {
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("25%");
});
it("fills to the correct percentage (50%)", async () => {
await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("50%");
});
it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("100%");
});
it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
const bar = screen.getByRole("progressbar");
expect(bar.getAttribute("aria-valuenow")).toBe("30");
});
});
// ── Input pre-fill ────────────────────────────────────────────────────────────
describe("BudgetSection — input pre-fill", () => {
it("pre-fills input with existing budget_limit", async () => {
await renderLoaded(budgetResponse({ budget_limit: 500 }));
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
expect(input.value).toBe("500");
});
it("leaves input empty when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
expect(input.value).toBe("");
});
});
// ── Save — PATCH calls ────────────────────────────────────────────────────────
describe("BudgetSection — save", () => {
it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "800" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(800);
});
it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
// Regression for QA bug report: `parseInt("0") || null` would yield null.
// The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "0" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(0);
expect(body.budget_limit).not.toBeNull();
});
it("sends budget_limit: null when input is blank", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeNull();
});
it("updates displayed stats after successful save", async () => {
const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(updated as any);
await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "2000" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
);
});
it("shows save error message on non-402 PATCH failure", async () => {
mockPatch.mockRejectedValueOnce(
new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
);
await renderLoaded();
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-save-error")).toBeTruthy()
);
expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
});
});
// ── 402 handling ──────────────────────────────────────────────────────────────
describe("BudgetSection — 402 handling", () => {
it("shows exceeded banner when GET returns 402", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
});
it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.queryByTestId("budget-loading")).toBeNull()
);
expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
});
it("shows exceeded banner when PATCH returns 402", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetResponse() as any);
mockPatch.mockRejectedValueOnce(make402PatchError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
// Should NOT also show the save-error alert
expect(screen.queryByTestId("budget-save-error")).toBeNull();
});
it("clears exceeded banner after a successful save", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
// Now a successful PATCH (limit was raised)
const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(updated as any);
await act(async () => {
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "5000" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
});
await waitFor(() =>
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
);
});
});
// ── Non-402 fetch error ───────────────────────────────────────────────────────
describe("BudgetSection — non-402 fetch errors", () => {
it("shows fetch error text on non-402 GET failure", async () => {
mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
);
expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
});
it("does NOT show stats row on fetch error", async () => {
mockGet.mockRejectedValueOnce(makeGenericError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
expect(screen.queryByTestId("budget-stats-row")).toBeNull();
});
it("does NOT show exceeded banner on non-402 fetch error", async () => {
mockGet.mockRejectedValueOnce(makeGenericError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
});
});

View File

@ -163,4 +163,50 @@ describe("ContextMenu — keyboard accessibility", () => {
const { container } = render(<ContextMenu />);
expect(container.firstChild).toBeNull();
});
// ── Zoom to Team (#557) ───────────────────────────────────────────────────
it("does NOT show 'Zoom to Team' when node has no children", () => {
mockStore.nodes = []; // no children
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const labels = items.map((el) => el.textContent ?? "");
expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(false);
});
it("shows 'Zoom to Team' when the node has children", () => {
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const labels = items.map((el) => el.textContent ?? "");
expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(true);
});
it("clicking 'Zoom to Team' dispatches molecule:zoom-to-team event", () => {
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
const dispatched: CustomEvent[] = [];
window.addEventListener("molecule:zoom-to-team", (e) => {
dispatched.push(e as CustomEvent);
});
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const zoomItem = items.find((el) => el.textContent?.includes("Zoom to Team"))!;
expect(zoomItem).toBeTruthy();
fireEvent.click(zoomItem);
expect(dispatched).toHaveLength(1);
expect(dispatched[0].detail.nodeId).toBe("ws-1");
window.removeEventListener("molecule:zoom-to-team", () => {});
});
it("clicking 'Zoom to Team' closes the context menu", () => {
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const zoomItem = items.find((el) => el.textContent?.includes("Zoom to Team"))!;
fireEvent.click(zoomItem);
expect(closeContextMenu).toHaveBeenCalled();
});
});

View File

@ -89,4 +89,144 @@ describe("CreateWorkspaceDialog — accessibility", () => {
expect(t2?.getAttribute("aria-checked")).toBe("true")
);
});
// ── Arrow-key navigation (WCAG 2.1 radio group) — Issue #556 ──────────────
it("selected radio has tabIndex=0, others have tabIndex=-1 (roving tabIndex)", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
// T1 is default selected
expect(t1.getAttribute("tabindex")).toBe("0");
expect(t2.getAttribute("tabindex")).toBe("-1");
expect(t3.getAttribute("tabindex")).toBe("-1");
});
it("ArrowDown moves selection from T1 to T2", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
t1.focus();
fireEvent.keyDown(t1, { key: "ArrowDown" });
await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
expect(t1.getAttribute("aria-checked")).toBe("false");
});
it("ArrowRight moves selection from T2 to T3", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
fireEvent.click(t2); // select T2 first
await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
t2.focus();
fireEvent.keyDown(t2, { key: "ArrowRight" });
await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
});
it("ArrowDown wraps from T3 back to T1", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
fireEvent.click(t3); // select T3 first
await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
t3.focus();
fireEvent.keyDown(t3, { key: "ArrowDown" });
await waitFor(() => expect(t1.getAttribute("aria-checked")).toBe("true"));
});
it("ArrowUp moves selection from T2 to T1", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
fireEvent.click(t2);
await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
t2.focus();
fireEvent.keyDown(t2, { key: "ArrowUp" });
await waitFor(() => expect(t1.getAttribute("aria-checked")).toBe("true"));
});
it("ArrowLeft wraps from T1 back to T3", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
t1.focus();
fireEvent.keyDown(t1, { key: "ArrowLeft" });
await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
});
});
// ── WCAG 2.1 SC 1.3.1 — Programmatic label association (Issue #558) ──────────
//
// Every <input> rendered by the InputField helper must have a matching <label>
// via htmlFor/id so screen readers announce the field name, not just the
// placeholder. useId() in InputField generates stable unique IDs per render.
describe("CreateWorkspaceDialog — WCAG SC 1.3.1 label/input association", () => {
it("Name input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const nameInput = screen.getByPlaceholderText("e.g. SEO Agent") as HTMLInputElement;
expect(nameInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${nameInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Name");
});
it("Role input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const roleInput = screen.getByPlaceholderText("e.g. SEO Specialist") as HTMLInputElement;
expect(roleInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${roleInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Role");
});
it("Budget limit input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const budgetInput = screen.getByPlaceholderText("e.g. 100") as HTMLInputElement;
expect(budgetInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${budgetInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Budget limit");
});
it("Template input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const templateInput = screen.getByPlaceholderText(
"e.g. seo-agent (from workspace-configs-templates/)"
) as HTMLInputElement;
expect(templateInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${templateInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Template");
});
it("each InputField generates a distinct id (no id collisions)", async () => {
await openDialog();
const inputs = [
screen.getByPlaceholderText("e.g. SEO Agent"),
screen.getByPlaceholderText("e.g. SEO Specialist"),
screen.getByPlaceholderText("e.g. 100"),
screen.getByPlaceholderText("e.g. seo-agent (from workspace-configs-templates/)"),
] as HTMLInputElement[];
const ids = inputs.map((i) => i.id).filter(Boolean);
const unique = new Set(ids);
expect(unique.size).toBe(ids.length); // no duplicates
expect(ids.length).toBe(4);
});
it("Name label text contains the required asterisk indicator", async () => {
await openDialog();
const nameInput = screen.getByPlaceholderText("e.g. SEO Agent") as HTMLInputElement;
const label = document.querySelector(`label[for="${nameInput.id}"]`);
// aria-hidden asterisk * is present for visual required indicator
expect(label?.querySelector("[aria-hidden='true']")?.textContent).toBe("*");
});
});

View File

@ -299,3 +299,85 @@ describe("CreateWorkspaceDialog — Hermes provider picker", () => {
);
});
});
// ---------------------------------------------------------------------------
// budget_limit field tests (#541)
// ---------------------------------------------------------------------------
describe("CreateWorkspaceDialog — budget_limit field", () => {
it("renders a Budget limit (USD) input", async () => {
await openDialog();
const budgetInput = screen.getByPlaceholderText("e.g. 100");
expect(budgetInput).toBeTruthy();
});
it("renders helper text 'Leave blank for unlimited'", async () => {
await openDialog();
expect(screen.getByText("Leave blank for unlimited")).toBeTruthy();
});
it("sends budget_limit as a number when a value is entered", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Budget Agent" },
});
fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
target: { value: "250" },
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(250);
});
it("sends budget_limit as null when the field is left blank", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Unlimited Agent" },
});
// Leave budget_limit empty
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeNull();
});
it("sends budget_limit as a float when a decimal value is entered", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Float Budget Agent" },
});
fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
target: { value: "49.99" },
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeCloseTo(49.99);
});
it("resets budget_limit to empty when dialog is reopened", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
target: { value: "500" },
});
// Close dialog
const cancelBtn = screen.getAllByRole("button").find((b) =>
b.textContent === "Cancel"
);
fireEvent.click(cancelBtn!);
cleanup();
// Re-open
await openDialog();
const budgetInput = screen.getByPlaceholderText("e.g. 100") as HTMLInputElement;
expect(budgetInput.value).toBe("");
});
});

View File

@ -0,0 +1,253 @@
'use client';
import { useState, useEffect, useCallback } from "react";
import { api } from "@/lib/api";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface BudgetData {
budget_limit: number | null;
budget_used: number;
budget_remaining: number | null;
}
interface Props {
workspaceId: string;
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** True when an API error carries a 402 status code. */
function isApiError402(e: unknown): boolean {
return e instanceof Error && /: 402( |$)/.test(e.message);
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
/**
* BudgetSection dedicated "Budget" section in the workspace details panel.
*
* - Fetches GET /workspaces/:id/budget on mount for live usage stats
* - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
* - Allows updating budget_limit via PATCH /workspaces/:id/budget
* - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
*/
export function BudgetSection({ workspaceId }: Props) {
const [budget, setBudget] = useState<BudgetData | null>(null);
const [loading, setLoading] = useState(true);
const [fetchError, setFetchError] = useState<string | null>(null);
const [limitInput, setLimitInput] = useState("");
const [saving, setSaving] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
/** True when a 402 has been seen from any API call in this section. */
const [budgetExceeded, setBudgetExceeded] = useState(false);
// ── Fetch current budget data ─────────────────────────────────────────────
const loadBudget = useCallback(async () => {
setLoading(true);
setFetchError(null);
try {
const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
setBudget(data);
setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
} catch (e) {
if (isApiError402(e)) {
setBudgetExceeded(true);
} else {
setFetchError(e instanceof Error ? e.message : "Failed to load budget");
}
} finally {
setLoading(false);
}
}, [workspaceId]);
useEffect(() => {
loadBudget();
}, [loadBudget]);
// ── Save handler ──────────────────────────────────────────────────────────
const handleSave = async () => {
setSaving(true);
setSaveError(null);
const raw = limitInput.trim();
// Use explicit empty-string check (not falsy check) so that a
// user-entered "0" is sent as budget_limit: 0, not null (unlimited).
const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
try {
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
budget_limit: parsedLimit,
});
setBudget(updated);
setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
// Clear exceeded state if the save succeeded (limit was raised or removed)
setBudgetExceeded(false);
} catch (e) {
if (isApiError402(e)) {
setBudgetExceeded(true);
} else {
setSaveError(e instanceof Error ? e.message : "Failed to save budget");
}
} finally {
setSaving(false);
}
};
// ── Progress calculation ──────────────────────────────────────────────────
const progressPct =
budget && budget.budget_limit != null && budget.budget_limit > 0
? Math.min(100, Math.round((budget.budget_used / budget.budget_limit) * 100))
: 0;
// ── Render ────────────────────────────────────────────────────────────────
return (
<div className="space-y-3" data-testid="budget-section">
{/* Section header */}
<div>
<h3 className="text-xs font-semibold text-zinc-400 uppercase tracking-wider">
Budget
</h3>
<p className="text-[11px] text-zinc-400 mt-0.5">
Limit total message credits for this workspace
</p>
</div>
{/* 402 exceeded banner */}
{budgetExceeded && (
<div
role="alert"
data-testid="budget-exceeded-banner"
className="flex items-center gap-2 px-3 py-2 rounded-lg bg-zinc-950 border border-amber-700/50 text-amber-400 text-xs font-medium"
>
<svg
width="13"
height="13"
viewBox="0 0 13 13"
fill="none"
aria-hidden="true"
className="shrink-0"
>
<path
d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
stroke="currentColor"
strokeWidth="1.4"
strokeLinejoin="round"
/>
<path
d="M6.5 5.5V7.5M6.5 9.5h.01"
stroke="currentColor"
strokeWidth="1.4"
strokeLinecap="round"
/>
</svg>
Budget exceeded messages blocked
</div>
)}
{/* Usage stats */}
{loading ? (
<p className="text-xs text-zinc-500" data-testid="budget-loading">
Loading
</p>
) : fetchError ? (
<p className="text-xs text-red-400" data-testid="budget-fetch-error">
{fetchError}
</p>
) : budget ? (
<div className="space-y-2">
{/* Stats row */}
<div className="flex items-baseline justify-between" data-testid="budget-stats-row">
<span className="text-xs text-zinc-400">Credits used</span>
<span className="text-xs font-mono text-zinc-300">
<span data-testid="budget-used-value">{budget.budget_used.toLocaleString()}</span>
<span className="text-zinc-500 mx-1">/</span>
<span data-testid="budget-limit-value">
{budget.budget_limit != null
? budget.budget_limit.toLocaleString()
: "Unlimited"}
</span>
</span>
</div>
{/* Progress bar (only when limit is set) */}
{budget.budget_limit != null && (
<div
role="progressbar"
aria-label="Budget usage"
aria-valuenow={progressPct}
aria-valuemin={0}
aria-valuemax={100}
className="h-1.5 w-full rounded-full bg-zinc-800 overflow-hidden"
>
<div
data-testid="budget-progress-fill"
className="h-full rounded-full bg-blue-500 transition-all duration-300"
style={{ width: `${progressPct}%` }}
/>
</div>
)}
{/* Remaining credits */}
{budget.budget_remaining != null && (
<p className="text-[11px] text-zinc-500" data-testid="budget-remaining">
{budget.budget_remaining.toLocaleString()} credits remaining
</p>
)}
</div>
) : null}
{/* Input + Save */}
<div className="space-y-1.5 pt-1">
<label
htmlFor={`budget-limit-input-${workspaceId}`}
className="text-[11px] text-zinc-400 block"
>
Budget limit (credits)
</label>
<input
id={`budget-limit-input-${workspaceId}`}
type="number"
min="0"
step="1"
value={limitInput}
onChange={(e) => setLimitInput(e.target.value)}
placeholder="e.g. 1000 — blank for unlimited"
data-testid="budget-limit-input"
className="w-full bg-zinc-800 border border-zinc-700 rounded-lg px-3 py-2 text-sm text-zinc-300 placeholder-zinc-500 focus:outline-none focus:border-blue-500 focus:ring-1 focus:ring-blue-500/30 transition-colors"
/>
<p className="text-xs text-zinc-500">Leave blank for unlimited</p>
{saveError && (
<div
role="alert"
data-testid="budget-save-error"
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-red-400"
>
{saveError}
</div>
)}
<button
onClick={handleSave}
disabled={saving}
data-testid="budget-save-btn"
className="px-4 py-1.5 bg-blue-600 hover:bg-blue-500 active:bg-blue-700 rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors"
>
{saving ? "Saving…" : "Save"}
</button>
</div>
</div>
);
}

View File

@ -141,19 +141,29 @@ export function ChannelsTab({ workspaceId }: Props) {
}
};
const [error, setError] = useState("");
const handleToggle = async (ch: Channel) => {
await api.patch(`/workspaces/${workspaceId}/channels/${ch.id}`, {
enabled: !ch.enabled,
});
load();
try {
await api.patch(`/workspaces/${workspaceId}/channels/${ch.id}`, {
enabled: !ch.enabled,
});
load();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to toggle channel");
}
};
const confirmDelete = async () => {
if (!pendingDelete) return;
const ch = pendingDelete;
setPendingDelete(null);
await api.del(`/workspaces/${workspaceId}/channels/${ch.id}`);
load();
try {
await api.del(`/workspaces/${workspaceId}/channels/${ch.id}`);
load();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to delete channel");
}
};
const handleTest = async (ch: Channel) => {
@ -188,6 +198,12 @@ export function ChannelsTab({ workspaceId }: Props) {
</button>
</div>
{error && (
<div className="px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-red-400">
{error}
</div>
)}
{/* Create form */}
{showForm && (
<div className="space-y-2 p-3 bg-zinc-800/40 rounded border border-zinc-700/50">

View File

@ -4,6 +4,8 @@ import { useState, useEffect, useCallback } from "react";
import { api } from "@/lib/api";
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
import { StatusDot } from "../StatusDot";
import { BudgetSection } from "./BudgetSection";
import { WorkspaceUsage } from "../WorkspaceUsage";
interface Props {
workspaceId: string;
@ -59,7 +61,11 @@ export function DetailsTab({ workspaceId, data }: Props) {
setSaving(true);
setSaveError(null);
try {
await api.patch(`/workspaces/${workspaceId}`, { name, role: role || null, tier });
await api.patch(`/workspaces/${workspaceId}`, {
name,
role: role || null,
tier,
});
updateNodeData(workspaceId, { name, role: role || "", tier });
setEditing(false);
} catch (e) {
@ -145,7 +151,13 @@ export function DetailsTab({ workspaceId, data }: Props) {
{saving ? "Saving..." : "Save"}
</button>
<button
onClick={() => { setEditing(false); setSaveError(null); setName(data.name); setRole(data.role || ""); setTier(data.tier); }}
onClick={() => {
setEditing(false);
setSaveError(null);
setName(data.name);
setRole(data.role || "");
setTier(data.tier);
}}
className="px-3 py-1 bg-zinc-700 hover:bg-zinc-600 text-xs rounded text-zinc-300"
>
Cancel
@ -190,6 +202,12 @@ export function DetailsTab({ workspaceId, data }: Props) {
)}
</Section>
{/* Budget — dedicated section with live usage stats (#541) */}
<BudgetSection workspaceId={workspaceId} />
{/* Token usage + spend — wired to GET /workspaces/:id/metrics (#592) */}
<WorkspaceUsage workspaceId={workspaceId} />
{/* Agent Card / Skills */}
{skills.length > 0 && (
<Section title="Skills">

View File

@ -219,7 +219,7 @@ export function MemoryTab({ workspaceId }: Props) {
Refresh
</button>
<button
onClick={() => setShowAdd(!showAdd)}
onClick={() => { setShowAdd(!showAdd); if (!showAdd) setShowAdvanced(true); }}
className="px-2 py-1 bg-blue-600 hover:bg-blue-500 text-[10px] rounded text-white"
>
+ Add

View File

@ -126,15 +126,23 @@ export function ScheduleTab({ workspaceId }: Props) {
if (!pendingDelete) return;
const { id } = pendingDelete;
setPendingDelete(null);
await api.del(`/workspaces/${workspaceId}/schedules/${id}`);
fetchSchedules();
try {
await api.del(`/workspaces/${workspaceId}/schedules/${id}`);
fetchSchedules();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to delete schedule");
}
};
const handleToggle = async (sched: Schedule) => {
await api.patch(`/workspaces/${workspaceId}/schedules/${sched.id}`, {
enabled: !sched.enabled,
});
fetchSchedules();
try {
await api.patch(`/workspaces/${workspaceId}/schedules/${sched.id}`, {
enabled: !sched.enabled,
});
fetchSchedules();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to toggle schedule");
}
};
const handleEdit = (sched: Schedule) => {

View File

@ -68,11 +68,14 @@ export function TracesTab({ workspaceId }: Props) {
{traces.length === 0 && !error ? (
<div className="text-center py-8">
<div className="text-2xl opacity-20 mb-2">📊</div>
<div className="text-2xl opacity-20 mb-2" aria-hidden="true">--</div>
<p className="text-xs text-zinc-600">No traces yet</p>
<p className="text-[10px] text-zinc-700 mt-1">
Set LANGFUSE_HOST, LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY to enable tracing
</p>
<details className="mt-2 text-[10px] text-zinc-700">
<summary className="cursor-pointer text-zinc-500 hover:text-zinc-400">How to enable tracing</summary>
<p className="mt-1">
Set <code className="font-mono text-zinc-400">LANGFUSE_HOST</code>, <code className="font-mono text-zinc-400">LANGFUSE_PUBLIC_KEY</code>, <code className="font-mono text-zinc-400">LANGFUSE_SECRET_KEY</code> as workspace secrets to enable tracing.
</p>
</details>
</div>
) : (
<div className="space-y-1">

View File

@ -1,4 +1,4 @@
'use client';
"use client";
interface RevealToggleProps {
revealed: boolean;

View File

@ -17,6 +17,8 @@ export const RUNTIME_REQUIRED_KEYS: Record<string, string[]> = {
deepagents: ["OPENAI_API_KEY"],
crewai: ["OPENAI_API_KEY"],
autogen: ["OPENAI_API_KEY"],
hermes: ["OPENROUTER_API_KEY"],
"gemini-cli": ["GOOGLE_API_KEY"],
};
/** Human-readable labels for common secret keys */
@ -26,6 +28,8 @@ export const KEY_LABELS: Record<string, string> = {
GOOGLE_API_KEY: "Google AI API Key",
SERP_API_KEY: "SERP API Key",
OPENROUTER_API_KEY: "OpenRouter API Key",
HERMES_API_KEY: "Nous Research API Key",
DEEPSEEK_API_KEY: "DeepSeek API Key",
};
/* ---------- Types ---------- */

View File

@ -719,6 +719,33 @@ describe("misc state setters", () => {
});
});
// ---------- hydrationError (#554) ----------
describe("hydrationError", () => {
it("initial value is null", () => {
expect(useCanvasStore.getState().hydrationError).toBeNull();
});
it("setHydrationError stores an error message", () => {
useCanvasStore.getState().setHydrationError("Network timeout");
expect(useCanvasStore.getState().hydrationError).toBe("Network timeout");
});
it("setHydrationError(null) clears the error", () => {
useCanvasStore.getState().setHydrationError("Some error");
useCanvasStore.getState().setHydrationError(null);
expect(useCanvasStore.getState().hydrationError).toBeNull();
});
it("setHydrationError does not affect other state", () => {
useCanvasStore.getState().hydrate([makeWS({ id: "ws-x", name: "X" })]);
useCanvasStore.getState().setHydrationError("oops");
// Nodes should still be intact
expect(useCanvasStore.getState().nodes).toHaveLength(1);
expect(useCanvasStore.getState().nodes[0].id).toBe("ws-x");
});
});
// ---------- ACTIVITY_LOGGED event ----------
describe("ACTIVITY_LOGGED event", () => {

View File

@ -142,6 +142,8 @@ export function buildNodesAndEdges(
currentTask: ws.current_task || "",
runtime: ws.runtime || "",
needsRestart: false,
budgetLimit: ws.budget_limit ?? null,
budgetUsed: ws.budget_used ?? null,
},
// Hide child nodes from canvas — they render inside the parent WorkspaceNode
hidden: !!ws.parent_id,

View File

@ -29,6 +29,10 @@ export interface WorkspaceNodeData extends Record<string, unknown> {
currentTask: string;
runtime: string;
needsRestart: boolean;
/** USD spend ceiling set by the user; null = unlimited. Added by issue #541. */
budgetLimit: number | null;
/** Cumulative USD spend. Present when the platform tracks spend (issue #541). */
budgetUsed?: number | null;
}
export type PanelTab = "details" | "skills" | "chat" | "terminal" | "config" | "schedule" | "channels" | "files" | "memory" | "traces" | "events" | "activity";
@ -73,6 +77,9 @@ interface CanvasState {
/** WebSocket connection status — drives the live indicator in the Toolbar. */
wsStatus: "connected" | "connecting" | "disconnected";
setWsStatus: (status: "connected" | "connecting" | "disconnected") => void;
/** Hydration error message — set when initial canvas load fails. Null when no error. */
hydrationError: string | null;
setHydrationError: (error: string | null) => void;
}
export const useCanvasStore = create<CanvasState>((set, get) => ({
@ -84,6 +91,8 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
contextMenu: null,
wsStatus: "connecting",
setWsStatus: (status) => set({ wsStatus: status }),
hydrationError: null,
setHydrationError: (error) => set({ hydrationError: error }),
viewport: { x: 0, y: 0, zoom: 1 },

View File

@ -118,6 +118,10 @@ export interface WorkspaceData {
x: number;
y: number;
collapsed: boolean;
/** USD spend ceiling set by the user; null = unlimited. Added by issue #541. */
budget_limit: number | null;
/** Cumulative USD spend for this workspace. Present when the platform tracks spend. */
budget_used?: number | null;
}
let socket: ReconnectingSocket | null = null;

View File

@ -0,0 +1,108 @@
---
title: "Deploy AI Agents on Fly.io — or Any Cloud — with One Config Change"
date: 2026-04-17
slug: deploy-anywhere
description: "Molecule AI supports fly.io agent deployment and control-plane provisioning. Switch backends with one env var — no agent code changes required."
tags: [platform, fly.io, deployment, infrastructure]
---
# Deploy AI Agents on Fly.io — or Any Cloud — with One Config Change
Your infrastructure choice just got decoupled from your agent platform choice. Molecule AI now ships three production-ready workspace backends — `docker`, `flyio`, and `controlplane` — and switching between them takes a single environment variable. Your agent code, model choices, and workspace topology stay exactly the same.
This post covers what shipped in [PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner), and which backend fits your situation.
## Before: One Deployment Model for Every Use Case
Until this week, Molecule AI workspaces ran on one backend: Docker. That was the right default for self-hosters — no external dependencies, full control, works anywhere a Docker daemon runs. But it left two groups making a compromise they shouldn't have to:
- **Indie developers and small teams** wanted Fly.io's economics: pay-per-use compute, fast cold starts, scale to zero when nobody's working.
- **SaaS builders** needed structural credential isolation. A Fly API token sitting in the tenant layer is one misconfiguration away from a security incident — not a policy problem, an architecture problem.
Both groups were choosing between "use the platform" and "get the deployment model I need." That trade-off is gone.
## Run AI Agents on Fly: The Indie Dev Path
You're already on Fly. You have an account, a Fly app, and you're comfortable with Machines. You want Molecule AI workspaces to provision as Fly Machines — no separate Docker host, no idle infrastructure, just workspaces that appear when needed and disappear when they don't.
Set three environment variables on your tenant platform instance:
```bash
CONTAINER_BACKEND=flyio
FLY_API_TOKEN=<your-fly-deploy-token>
FLY_WORKSPACE_APP=<your-fly-app-name>
# Optional — defaults to ord
FLY_REGION=ord
```
When a workspace is created, the Fly provisioner:
1. Spins up a Fly Machine inside your `FLY_WORKSPACE_APP`
2. Injects workspace secrets and the platform registration URL as machine env vars
3. Selects the right GHCR image for the runtime (`hermes` → `ghcr.io/molecule-ai/workspace-hermes:latest`, and so on)
4. Applies tier-based resource limits — T2 at 512 MB / 1 vCPU, T3 at 2 GB / 2 vCPU, T4 at 4 GB / 4 vCPU
5. Issues a boot-time auth token so the workspace agent can register with the platform immediately
Your workspaces run as first-class Fly Machines. When they're idle, Fly handles the scale-down. Your bill reflects actual usage, not reserved capacity.
## Multi-Tenant Agent Provisioning Without Credential Sprawl
You're building a SaaS product on top of Molecule AI. Each customer gets a Molecule workspace. The problem: if every tenant platform instance carries a `FLY_API_TOKEN`, you've distributed cloud credentials across your tenants — structurally. Policy controls help, but they don't remove the credential from the attack surface.
`CONTAINER_BACKEND=controlplane` removes it entirely.
```
Canvas → Tenant Platform → Control Plane API → Fly Machines API
```
The tenant platform never holds a Fly token. It calls the Molecule control plane at `https://api.moleculesai.app` (overridable via `CP_PROVISION_URL` for staging environments), which holds Fly credentials and orchestrates workspace provisioning centrally.
For standard SaaS deployments, you don't configure this manually — the platform auto-detects the right backend:
- `MOLECULE_ORG_ID` set → SaaS tenant → **control plane provisioner activates automatically**
- `MOLECULE_ORG_ID` empty → self-hosted → **Docker provisioner, no change needed**
The right backend is the default for your context. For most SaaS builders: set `MOLECULE_ORG_ID` at tenant launch, and credential isolation is structural from day one.
## Self-Hosted vs Cloud AI Agents: Backend Comparison
| Backend | `CONTAINER_BACKEND` | Best for | Who holds cloud credentials |
|---|---|---|---|
| **Docker** | *(empty / default)* | Self-hosted, local dev | No external credentials needed |
| **Fly Machines** | `flyio` | Indie devs / small teams on Fly | `FLY_API_TOKEN` lives on the tenant |
| **Control Plane** | `controlplane` | SaaS builders, multi-tenant products | Fly token held by control plane only — never on tenant |
**Fly backend env vars** (for `CONTAINER_BACKEND=flyio`):
| Variable | Required | Default | What it does |
|---|---|---|---|
| `CONTAINER_BACKEND` | Yes | — | Activates the Fly provisioner |
| `FLY_API_TOKEN` | Yes | — | Fly deploy token |
| `FLY_WORKSPACE_APP` | Yes | — | Fly app that hosts workspace machines |
| `FLY_REGION` | No | `ord` | Region for new machines |
## Agent Orchestration in the Cloud: What Doesn't Change
Switching backends changes where workspaces run, not how they work. From any agent runtime's perspective — Hermes, Letta, or whatever you're orchestrating — the workspace is the workspace. Unchanged across all three backends:
- Agent registration and boot sequence
- Model routing and provider dispatch
- Workspace secrets injection
- The full platform API surface
No changes to agent code, tool definitions, or orchestration logic. Swap `CONTAINER_BACKEND`, redeploy, done.
## Multi-Agent Cloud Deployment: Choose Your Path
- **Self-hosting?** Leave `CONTAINER_BACKEND` unset. Docker is the default — nothing to configure.
- **On Fly, small team?** Set `CONTAINER_BACKEND=flyio` with `FLY_API_TOKEN` and `FLY_WORKSPACE_APP`. Workspaces become Fly Machines in your own Fly account.
- **Building a SaaS product on Molecule AI?** Set `MOLECULE_ORG_ID` at tenant launch. The control plane provisioner activates automatically. No Fly credentials on the tenant, ever.
**Pick your backend. Deploy your agents.**
→ [Quickstart: choose your deployment backend](/docs/quickstart)
---
*[PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner) are both merged to `main`. Molecule AI is open source — contributions welcome.*

View File

@ -37,6 +37,452 @@ about where our differentiation actually is.
---
## Competitor Snapshot
> **Machine-readable index for PMM cron diffing.** One YAML entry per competitor —
> the cron diffs this block to detect version bumps, threat escalations, and new
> `notable_changes`, then updates `docs/marketing/competitors.md`.
>
> **Maintenance rule:** whenever you update a narrative entry below, also bump the
> corresponding `date`, `version`, and `notable_changes` fields here.
>
> Fields: `name` · `slug` · `date` (last reviewed) · `version` · `stars` ·
> `threat_level` (high / medium / low) · `notable_changes` (≤ 2 sentences) · `source_url`
```yaml
# competitor-snapshot
# Generated: 2026-04-17 | Maintainer: Research Lead
# PMM cron reads this block, diffs vs. previous commit, updates docs/marketing/competitors.md.
# Update date + version + notable_changes whenever a competitor ships something significant.
snapshots:
# ── HIGH THREAT ────────────────────────────────────────────────────────────────────
# Direct substitutes or major market-erosion risk for Molecule AI.
- name: Paperclip
slug: paperclip
date: "2026-04-17"
version: "v2026.416.0"
stars: "54.8k"
threat_level: medium
notable_changes: >
Downgraded HIGH → MEDIUM (2026-04-17, deep-dive #571): no A2A protocol,
no visual canvas, no org-chart UI on roadmap. Blocker dependencies are
single-process task-graph DAG, not inter-agent coordination. Execution
policies are budget ceilings, not tool restrictions. Only capability gap
vs Molecule AI is per-workspace budget limits (tracked #541). Brand/
framing threat ("zero-human companies") but not a technical substitute.
v2026.416.0 (Apr 16) ships chat threads + execution policies.
source_url: https://github.com/paperclipai/paperclip/releases
- name: OpenAI Agents SDK
slug: openai-agents-sdk
date: "2026-04-17"
version: "v0.14.1"
stars: "14k"
threat_level: high
notable_changes: >
v0.14.1 (Apr 15 2026) patches tracing export on top of v0.14.0's
SandboxAgent beta — persistent isolated workspaces, snapshot/resume,
and sandbox memory directly competing with our workspace lifecycle model.
source_url: https://github.com/openai/openai-agents-python/releases
- name: CrewAI
slug: crewai
date: "2026-04-17"
version: "v1.14.1"
stars: "48k"
threat_level: high
notable_changes: >
v1.14.1 (Apr 8 2026) adds async checkpoint TUI browser; 1.4B agentic
automations logged, 60% Fortune 500 adoption, and $18M Insight-led round
make CrewAI Enterprise the dominant multi-agent framework in our target
enterprise segment.
source_url: https://github.com/crewAIInc/crewAI/releases
- name: Google ADK
slug: google-adk
date: "2026-04-17"
version: "v1.30.0"
stars: "19k"
threat_level: high
notable_changes: >
v1.30.0 (Apr 13 2026) adds Auth Provider support to the agent registry,
Parameter Manager integration, and Gemma 4 model support; v2.0.0a3
pre-release introduces a graph-based execution engine.
source_url: https://github.com/google/adk-python/releases
- name: Microsoft Agent Framework
slug: microsoft-agent-framework
date: "2026-04-17"
version: "python-1.0.1"
stars: "9.5k"
threat_level: high
notable_changes: >
v1.0 GA (Apr 7 2026): multi-agent orchestration (sequential, concurrent,
group-chat, handoff, magnetic patterns), native A2A+MCP, OpenTelemetry,
pause/resume durability, HITL approvals. AG-UI protocol for SSE-streaming
agent events to frontends — direct competitor to our WebSocket canvas.
Process Framework GA planned Q2 2026. Molecule gap: AG-UI SSE endpoint,
tool governance registry, cost transparency per workspace.
source_url: https://github.com/microsoft/agent-framework/releases
# ── MEDIUM THREAT ──────────────────────────────────────────────────────────────────
# Significant overlap in adjacent space; no direct substitution risk today.
- name: Dify
slug: dify
date: "2026-04-17"
version: "v1.13.3"
stars: "60k"
threat_level: medium
notable_changes: >
Latest stable is v1.13.3 (Mar 27 2026); v1.14.0 RC adds Human Input
node (HITL); raised $30M Pre-A (Mar 2026, $180M valuation) with
280 enterprise deployments — no-code positioning targets business users,
not our developer audience.
source_url: https://github.com/langgenius/dify/releases
- name: LangGraph
slug: langgraph
date: "2026-04-17"
version: "v1.1.6"
stars: "29k"
threat_level: medium
notable_changes: >
langgraph-cli v0.4.22 (Apr 16 2026) adds deploy source tracking;
core v1.1.6 (Apr 10 2026) ships LangGraph 2.0 declarative guardrail nodes;
LangGraph Cloud hosted execution competes with our scheduler.
source_url: https://github.com/langchain-ai/langgraph/releases
- name: VoltAgent
slug: voltagent
date: "2026-04-17"
version: "server-elysia@2.0.7"
stars: "8.2k"
threat_level: medium
notable_changes: >
@voltagent/server-elysia v2.0.7 (Apr 11 2026) fixes A2A agent card
endpoints to advertise correct absolute URLs; VoltOps Console is the
closest Canvas analogue in the TypeScript ecosystem.
source_url: https://github.com/VoltAgent/voltagent/releases
- name: n8n
slug: n8n
date: "2026-04-17"
version: "v2.17.2"
stars: "50k"
threat_level: medium
notable_changes: >
v2.17.2 (Apr 16 2026) improves AI Gateway credentials endpoint;
n8n 2.0 (Dec 2025) added enterprise-grade AI Agent nodes, RBAC, SSO,
and 400+ channel integrations — direct overlap with our workspace_channels.
source_url: https://github.com/n8n-io/n8n/releases
- name: Claude Code Routines
slug: claude-code-routines
date: "2026-04-17"
version: "cloud-feature"
stars: "n/a"
threat_level: medium
notable_changes: >
Launched Apr 14 2026 (research preview): Anthropic-hosted cron + GitHub-
event-triggered Claude Code sessions running on Anthropic cloud; competes
with our workspace_schedules; single-model, no org canvas.
source_url: https://code.claude.com/docs/en/routines
- name: Scion
slug: scion
date: "2026-04-17"
version: "active"
stars: "early"
threat_level: medium
notable_changes: >
Launched Apr 8 2026 — GCP experimental container-per-agent harness for
Claude Code/Gemini CLI with parallel isolated workspaces and markdown
workflow definitions; escalation risk to HIGH if productized by Google.
source_url: https://github.com/GoogleCloudPlatform/scion
- name: Multica
slug: multica
date: "2026-04-17"
version: "active-36-releases"
stars: "12.8k"
threat_level: medium
notable_changes: >
Positioned as open-source Claude Managed Agents alternative (Apr 2026);
local daemon + central backend with pgvector semantic skill compounding;
+1,503 stars/day at launch — no A2A or org canvas but similar architecture.
source_url: https://github.com/multica-ai/multica/releases
- name: Cline
slug: cline
date: "2026-04-17"
version: "active"
stars: "44k"
threat_level: medium
notable_changes: >
VS Code Claude Code extension with 44k ⭐ and MCP support; primary user
overlap with our Claude Code workspace — developers who outgrow Cline's
single-session model are our conversion path.
source_url: https://github.com/cline/cline/releases
- name: ClawRun
slug: clawrun
date: "2026-04-17"
version: "active-45-releases"
stars: "84"
threat_level: medium
notable_changes: >
Closest architectural match tracked — sandbox/heartbeat/snapshot-resume/
channels/cost-tracking feature parity with us; 84 ⭐ but 45 releases
shows active shipping; adding A2A would make this a direct lightweight
competitor.
source_url: https://github.com/clawrun-sh/clawrun/releases
- name: Gemini CLI
slug: gemini-cli
date: "2026-04-17"
version: "v0.38.1"
stars: "101k"
threat_level: medium
notable_changes: >
v0.38.1 (Apr 15 2026) is a cherry-pick stability patch; 1M-token context
+ MCP support; runtime candidate for our workspace adapter — elevated to
MEDIUM because it forms a full agent stack with Google ADK + adk-web.
source_url: https://github.com/google-gemini/gemini-cli/releases
# ── LOW THREAT ─────────────────────────────────────────────────────────────────────
# Tools, infra layers, single-agent tools, or products we use — not substitutes.
- name: Hermes Agent
slug: hermes-agent
date: "2026-04-17"
version: "v0.10.0"
stars: "61k"
threat_level: low
notable_changes: >
v0.10.0 (Apr 16 2026) launches Tool Gateway giving paid Portal subscribers
built-in web search, image generation, TTS, and browser automation; no
multi-agent or org hierarchy — personal AI shape, not platform competitor.
source_url: https://github.com/NousResearch/hermes-agent/releases
- name: gstack
slug: gstack
date: "2026-04-17"
version: "active"
stars: "70k"
threat_level: low
notable_changes: >
Viral Claude Code skills bundle with 70k ⭐; sequential single-session
persona-switching — no persistent infra, Docker isolation, or A2A protocol;
differentiation holds unless multi-session execution is added.
source_url: https://github.com/garrytan/gstack
- name: Flowise
slug: flowise
date: "2026-04-17"
version: "flowise@3.1.2"
stars: "30k"
threat_level: low
notable_changes: >
v3.1.2 (Apr 14 2026) delivers security hardening (CORS abuse, credential
leaks, unauthorized access); acquired by Workday (Aug 2025) — repositioned
for HR/finance enterprise, narrowing its developer-team market.
source_url: https://github.com/FlowiseAI/Flowise/releases
- name: OpenHands
slug: openhands
date: "2026-04-17"
version: "v1.6.0"
stars: "47k"
threat_level: low
notable_changes: >
v1.6.0 (Mar 30 2026) adds hook support and /clear command preserving
sandbox runtime; jumped to v1.x series (was v0.39.0); SWE-Bench top
open-source rank — single-agent software engineer, not a platform.
source_url: https://github.com/All-Hands-AI/OpenHands/releases
- name: Temporal
slug: temporal
date: "2026-04-17"
version: "v1.30.4"
stars: "13k"
threat_level: low
notable_changes: >
v1.30.4 (Apr 10 2026) patches CVE-2026-5724 MEDIUM authorization
vulnerability; $300M Series D (Feb 2026, $5B valuation); we integrate
Temporal as infra via workspace-template/builtin_tools/temporal_workflow.py.
source_url: https://github.com/temporalio/temporal/releases
- name: Chrome DevTools MCP
slug: chrome-devtools-mcp
date: "2026-04-17"
version: "active"
stars: "35.5k"
threat_level: low
notable_changes: >
Official ChromeDevTools org MCP server with 23 browser-control tools;
replaces our bespoke Puppeteer CDP plugin — we adopt it as of issue #540.
source_url: https://github.com/ChromeDevTools/chrome-devtools-mcp
- name: Composio
slug: composio
date: "2026-04-17"
version: "active"
stars: "18k"
threat_level: low
notable_changes: >
250+ tool integrations with managed auth; potential skill-pack dependency
for workspace channel integrations rather than a competing platform.
source_url: https://github.com/composio-dev/composio/releases
- name: AgentScope
slug: agentscope
date: "2026-04-17"
version: "v1.0.18"
stars: "23.8k"
threat_level: low
notable_changes: >
v1.0.18 (Mar 26 2026) from Alibaba/ModelScope with MsgHub typed routing
and OpenTelemetry; MCP integration; no deployment layer — framework only.
source_url: https://github.com/modelscope/agentscope/releases
- name: Skills CLI
slug: skills-cli
date: "2026-04-17"
version: "active"
stars: "14.2k"
threat_level: low
notable_changes: >
Vercel-backed canonical agentskills.io install CLI covering 45+ agents
including our Claude Code workspace; aligning plugins/ manifest to the
agentskills.io spec gives us free distribution through this channel.
source_url: https://github.com/vercel-labs/skills
- name: Archon
slug: archon
date: "2026-04-17"
version: "v0.3.6"
stars: "18.1k"
threat_level: low
notable_changes: >
v0.3.6 active; YAML-DAG coding workflow with mixed AI/deterministic nodes
and human approval gates; reference design for our workspace delivery
pipelines — no multi-agent coordination.
source_url: https://github.com/coleam00/Archon/releases
- name: Tencent AI-Infra-Guard
slug: tencent-ai-infra-guard
date: "2026-04-17"
version: "v4.1.3"
stars: "3.5k"
threat_level: low
notable_changes: >
v4.1.3 (Apr 9 2026); red team platform scanning MCP server and skills
surfaces — use as security compliance checklist for our MCP server and
plugin registry hardening; not a runtime competitor.
source_url: https://github.com/Tencent/AI-Infra-Guard/releases
- name: Holaboss
slug: holaboss
date: "2026-04-17"
version: "active"
stars: "1.7k"
threat_level: low
notable_changes: >
Desktop "AI employee" with filesystem-as-memory and compaction boundaries;
single-agent, no A2A — primary concern is terminology collisions
(workspace / MEMORY.md / SKILL.md / agentskills.io).
source_url: https://github.com/holaboss-ai/holaboss-ai
- name: claude-mem
slug: claude-mem
date: "2026-04-17"
version: "active"
stars: "56k"
threat_level: low
notable_changes: >
SQLite FTS5 + Chroma hybrid cross-session memory with lifecycle hooks;
56k ⭐ signals strong demand for the gap we need to close in agent_memories
— adopt PostToolUse + SessionEnd observation pipeline.
source_url: https://github.com/thedotmack/claude-mem
- name: Plannotator
slug: plannotator
date: "2026-04-17"
version: "v0.17.10"
stars: "4.3k"
threat_level: low
notable_changes: >
v0.17.10 (Apr 13 2026); HITL plan annotation UX with structured feedback
types (delete/insert/replace/comment); reference design for improving our
approvals API response schema.
source_url: https://github.com/backnotprop/plannotator/releases
- name: open-multi-agent
slug: open-multi-agent
date: "2026-04-17"
version: "v1.1.0"
stars: "5.7k"
threat_level: low
notable_changes: >
v1.1.0 (Apr 1 2026); TypeScript multi-agent with runtime goal-to-DAG
decomposition in 3 deps; ephemeral per-run — no persistent identity,
no canvas, no scheduling.
source_url: https://github.com/JackChen-me/open-multi-agent/releases
- name: Open Agents (Vercel)
slug: open-agents-vercel
date: "2026-04-17"
version: "active"
stars: "2.2k"
threat_level: low
notable_changes: >
+1,020 stars in one day (Apr 15 2026); Vercel Labs reference app for
background coding agents with snapshot-based VM resumption; no multi-
agent coordination — reference template, not a platform.
source_url: https://github.com/vercel-labs/open-agents
- name: GenericAgent
slug: generic-agent
date: "2026-04-17"
version: "v1.0"
stars: "2.1k"
threat_level: low
notable_changes: >
v1.0 (Jan 16 2026); self-evolving skill tree with four-tier memory
hierarchy (L0 rules → L4 session archives); single-agent, no A2A —
memory taxonomy worth borrowing for agent_memories scopes.
source_url: https://github.com/lsdefine/GenericAgent/releases
- name: OpenSRE
slug: opensre
date: "2026-04-17"
version: "active"
stars: "900"
threat_level: low
notable_changes: >
AI SRE toolkit with 40+ observability integrations (Grafana/Datadog/
K8s/AWS/GCP/PagerDuty); potential DevOps workspace skill-pack source
rather than a competing platform.
source_url: https://github.com/Tracer-Cloud/opensre
- name: AMD GAIA
slug: amd-gaia
date: "2026-04-17"
version: "v0.17.2"
stars: "1.2k"
threat_level: low
notable_changes: >
v0.17.2 (Apr 10 2026); AMD-backed local agent framework hardware-locked
to Ryzen AI 300+ NPU; MCP support; not general-purpose.
source_url: https://github.com/amd/gaia/releases
```
---
## Entries
### Holaboss — `holaboss-ai/holaboss-ai`
@ -870,9 +1316,9 @@ builders; Molecule AI users are developers building agent companies.
**Terminology collisions:** "middleware" — their processing pipeline hook; undefined in our platform. "graph" — their workflow DAG vs our live org chart (same word, different semantics).
**Signals to react to:** If AF 1.0 achieves enterprise adoption → update our autogen adapter to target `microsoft/agent-framework`. If AF Labs RL ships stable → evaluate for dynamic PM routing based on workspace performance history.
**Signals to react to:** AF 1.0 GA shipped April 7 with AG-UI (SSE protocol for streaming agent events to frontends). AG-UI is a direct competitor to our WebSocket canvas events — if AG-UI becomes a standard, we need an AG-UI-compatible SSE endpoint to attract MAF users. Process Framework GA in Q2 2026 will add visual workflow design — evaluate overlap with our Canvas. Google's private Tool Registry (Vertex AI) sets an enterprise expectation for tool governance that we should match with per-org curated plugin registries.
**Last reviewed:** 2026-04-15 · **Stars / activity:** ~9.5k ⭐, April 2026 .NET release, official AutoGen successor
**Last reviewed:** 2026-04-17 · **Stars / activity:** ~9.5k ⭐, v1.0 GA April 7 2026, AG-UI protocol announced
---
@ -1202,22 +1648,475 @@ Complementary by design.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~35.5k ⭐, ChromeDevTools org, Apache-2.0
---
### LangGraph — `langchain-ai/langgraph`
**Pitch:** "Build resilient language agents as graphs — stateful, multi-actor
applications with fine-grained control over agent flow."
**Shape:** Python + JavaScript/TypeScript library (MIT), ~29k ⭐, v1.1.6 released
April 10 2026. Part of the LangChain ecosystem. Agents are modelled as directed
graphs: nodes are callables (LLM calls, tool calls, conditional branches), edges are
routing rules, and a persistent **state schema** carries data between nodes.
Checkpointing (memory persistence across turns) is built in via a pluggable
`Checkpointer` interface (in-memory, SQLite, Postgres, Redis). Multi-agent
compositions via subgraph nodes. LangGraph Cloud offers hosted execution backed by
LangSmith observability. LangGraph 2.0 GA shipped February 2026, adding declarative
guardrail nodes (content filtering, rate limiting, audit logging as config).
**Overlap with us:** Molecule AI ships a `langgraph` runtime adapter
(`molecule-ai-workspace-template-langgraph`) — this is us *on top of* LangGraph.
Their graph model (nodes, edges, state) is structurally analogous to our workspace
hierarchy (workspaces, A2A calls, shared context). Their `Checkpointer` is the
lower-level equivalent of our `agent_memories` table. LangGraph Cloud's hosted
execution competes directly with our scheduler + workspace lifecycle.
**Differentiation:** LangGraph is a framework for *building* the logic of one agent
or pipeline; Molecule AI is a platform for *deploying and coordinating* long-lived
agents as an org. LangGraph has no concept of Docker workspace isolation, org-chart
hierarchy, inter-agent A2A protocol, channel integrations, visual canvas, or cron
scheduling. Our langgraph adapter *runs on top of* LangGraph — they're layered, not
competing, for most use cases. The gap is LangGraph Cloud vs our hosted platform.
**Worth borrowing:**
- **Declarative guardrail nodes** (v2.0) — content filtering and audit logging as
first-class graph nodes rather than custom code. Map to our `approvals` table:
add declarative gate types (content-filter, rate-limit) in workspace config.
- **Subgraph composition** — composing multi-agent pipelines by nesting graphs.
Our workspace parent/child hierarchy is the operational equivalent; study for
dynamic sub-workspace spawning UX.
- **Checkpointer interface** — the pluggable backend design (SQLite → Postgres →
Redis hot path) is the right abstraction for our `agent_memories` persistence layer.
**Terminology collisions:**
- "state" — LangGraph: the typed dict carried between graph nodes; ours: workspace
status (online/offline/degraded). No user confusion but docs should disambiguate.
- "node" — LangGraph: a callable in the agent graph; our canvas: a workspace tile.
Same word, very different level of abstraction.
- "graph" — LangGraph: the directed workflow graph; our canvas: the live org chart.
Marketing copy should distinguish "workflow graph" (LangGraph) vs "org chart" (us).
**Signals to react to:**
- If LangGraph Cloud adds persistent agent identity (long-lived named agents beyond
per-session checkpoints) → direct hosted-platform competition; accelerate our
LangGraph adapter differentiation.
- If LangGraph 2.0 guardrail nodes become the standard compliance primitive for AI
pipelines → expose an equivalent gate type in `workspace-template/` adapters.
- If LangSmith + LangGraph Cloud bundle as an all-in-one enterprise platform → we
need to position our model-agnostic, self-hostable story more aggressively against
LangChain lock-in.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~29k ⭐, v1.1.6 April 10 2026, very active
---
### CrewAI — `crewAIInc/crewAI`
**Pitch:** "Framework for orchestrating role-playing, autonomous AI agents — by
fostering collaborative intelligence, CrewAI empowers agents to work together
seamlessly, tackling complex tasks."
**Shape:** Python library (MIT), ~48k ⭐, v1.14.2 released April 8 2026. Agents are
defined by `role`, `goal`, and `backstory` fields and assembled into a `Crew` with
`Process.sequential` (fixed order) or `Process.hierarchical` (manager agent
delegates) execution. `Flow` (event-driven stateful pipelines, shipped 2024-Q4)
enables complex conditional branching beyond linear crew execution. Model-agnostic:
OpenAI, Anthropic, Gemini, Mistral, Bedrock, Ollama, and any LiteLLM-compatible
endpoint. Tools are Python callables or MCP integrations. CrewAI Enterprise is the
commercial SaaS offering.
**Overlap with us:** Molecule AI ships a `crewai` runtime adapter
(`molecule-ai-workspace-template-crewai`) — our workspaces *run* CrewAI crews.
The Crew role model (`role` + `goal` + `backstory`) is our system-prompt-encoded
persona convention made explicit and typed. `Process.hierarchical` with a manager
agent mirrors our PM → Dev Lead → Engineer delegation chain. Flow's event-driven
branching is analogous to our `workspace_schedules` trigger model.
**Differentiation:** CrewAI is an in-process Python framework; Molecule AI is the
operational platform. CrewAI agents are ephemeral per crew run — no Docker isolation,
no persistent identity across restarts, no org-chart canvas, no A2A between
independently deployed agents, no cron scheduling, no channel integrations. A
Molecule AI CrewAI workspace *persists* across sessions, holds a role in a larger org,
and coordinates via our A2A protocol — capabilities CrewAI alone does not provide.
**Worth borrowing:**
- **Typed role schema**`{role, goal, backstory}` as first-class typed fields
(not free-text system prompt). Our `config.yaml` `role:` is a single string; adopting
a richer `{role, goal, backstory}` triplet would improve agent persona consistency
across restarts and be CrewAI-compatible.
- **`Flow` event-driven pipelines** — conditional state-machine branching triggered by
events. Applicable to our `workspace_schedules` — replace cron-only triggers with
an event graph: "when PR merged → trigger QA workspace → on pass → trigger deploy."
- **Tool decorator pattern**`@tool` with docstring-as-schema is simpler than our
MCP tool config approach for workspace-local tools.
**Terminology collisions:**
- "crew" — their multi-agent team; our team is a set of workspaces in an org
hierarchy. Marketing copy should use "workspace org" not "crew" to stay distinct.
- "agent" — their ephemeral in-process Python object; our long-lived Docker workspace.
- "task" — their atomic unit of work assigned to an agent; our `current_task`
heartbeat field. Same word, different scope.
**Signals to react to:**
- If CrewAI ships persistent agent state between crew runs → closes primary gap with
our workspace model; ~48k ⭐ means it would land with significant reach.
- If CrewAI Enterprise adds visual org-chart canvas or A2A-style inter-crew
communication → direct platform competitor.
- If the 2026 State of Agentic AI survey (65% of orgs using agents) accelerates
CrewAI Enterprise sales → their enterprise positioning competes directly with ours;
update ICP messaging.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~48k ⭐, v1.14.2 April 8 2026, very active
---
### Temporal — `temporalio/temporal`
**Pitch:** "The durable execution platform — write code that runs reliably even in
the face of failures, timeouts, and restarts."
**Shape:** Go server + SDKs for Go, Java, TypeScript, Python, .NET, PHP (MIT),
~13k ⭐ server repo. Workflow logic is deterministic code that Temporal replays from
event history after failures — no explicit retry/checkpoint code. `Activities` are
the fallible steps; `Signals` allow external input mid-workflow; `Queries` expose
read-only workflow state. Temporal Cloud is the managed SaaS; self-hosted runs on
K8s or Docker. Raised $300M Series D at $5B valuation February 2026, with AI driving
demand for durable execution. v1.30.4 released April 10 2026.
**Overlap with us:** Molecule AI already integrates Temporal via
`workspace-template/builtin_tools/temporal_workflow.py`. The `infra/scripts/setup.sh`
starts a local Temporal server (`:7233` gRPC + `:8233` Web UI). Any Molecule AI
workspace that needs bulletproof long-running or retryable work delegates to Temporal.
Temporal's Worker Versioning (GA March 2026) solves the same code-deploy-during-live-
workflow problem our restart-context message handles ad hoc.
**Differentiation:** Temporal is infrastructure — a durable execution engine with no
concept of agent identity, LLM calls, memory, org hierarchy, canvas, channels, or A2A.
It is the *substrate* beneath agents that need guaranteed execution; we are the
*platform* that decides when to call Temporal vs handle work in the workspace itself.
We are Temporal consumers, not competitors. The distinction for users: use Temporal
when you need workflow history replay and multi-step retry guarantees; use Molecule AI
scheduling for lighter cron-triggered agent prompts.
**Worth borrowing:**
- **Worker Versioning** (GA March 2026) — pin live workflows to a specific code
version so deploys don't corrupt in-flight runs. Analogous problem to our
workspace restart-context; worth evaluating as the underlying mechanism for
zero-downtime workspace deploys.
- **Workflow Update operation** — synchronous request/response pattern for live
workflows (e.g., human approves mid-workflow). Cleaner than our current
`approvals` polling; evaluate for HITL in long Temporal-backed workspace tasks.
- **Upgrade-on-Continue-as-New** (Public Preview March 2026) — pinned workflows can
opt into a newer code version at a clean continuation boundary. Pattern applicable
to our workspace versioning strategy.
**Terminology collisions:**
- "workflow" — Temporal: a deterministic, replay-safe code function; ours: informal
delegation chain term. In our docs, "Temporal workflow" should always be qualified
to avoid confusion with "workflow" in general product copy.
- "worker" — Temporal: a process that polls the server and executes workflow/activity
code; ours: not a first-class term (workspaces fill this role).
- "activity" — Temporal: a fallible, retryable step in a workflow; ours: `activity_logs`
table (A2A traffic logs). Different concepts sharing a word.
**Signals to react to:**
- If Temporal Cloud adds native LLM-aware primitives (e.g., LLM call as a first-class
activity with token tracking, model fallback, prompt versioning) → Temporal becomes
an agent platform, not just an infra layer; reassess our `temporal_workflow.py`
integration depth.
- If the $300M Series D accelerates enterprise sales motion → more enterprises will
arrive with Temporal already deployed; strengthen our Temporal integration story as
a first-class enterprise deployment pattern.
- If Upgrade-on-Continue-as-New becomes stable → adopt for workspace blue/green
deploy pattern (no workspace downtime during code updates).
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~13k ⭐ (server); $5B valuation, $300M Series D Feb 2026; v1.30.4 April 10 2026
---
### Dify — `langgenius/dify`
**Pitch:** "Production-ready platform for agentic workflow development — the leading
open-source LLM app development platform."
**Shape:** Python backend + React frontend (MIT), ~60k ⭐, v1.14.0 released February
2026. Visual drag-drop workflow canvas where LLM calls, RAG retrievers, code
executors, HTTP nodes, and agent loops are wired as a graph. Ships a full app
deployment stack: API server, web UI widget, and Slack/Telegram/WhatsApp channel
integrations. RAG pipeline with knowledge base management (file upload → chunk →
embed → retrieve). Supports 50+ LLM providers. Dify Cloud is the managed SaaS;
self-hosted via Docker Compose. Raised $30M Pre-A round led by HSG, March 2026.
**Overlap with us:** Both have a visual canvas for connecting AI work. Both support
channel integrations (Slack / Telegram / WhatsApp). Both run LLM-backed agents and
expose a REST API for external trigger. Dify's `Human Input` node (v1.14.0) is the
same pattern as our `approvals` table — pause workflow, wait for human input, resume.
Their knowledge base (RAG) is the equivalent of what our Research Lead workspace does
via tool calls to external retrieval services. Dify Cloud competes with our SaaS
control plane for teams that want a hosted no-code LLM app platform.
**Differentiation:** Dify targets **no-code and low-code builders** — the UX is
workflow configuration, not code. No persistent agent identity across workflow runs,
no multi-agent org hierarchy (agents in Dify are single workflow nodes, not
first-class citizens), no A2A protocol between independently deployed agents, no
Docker container isolation per agent. Molecule AI targets developers who write
`config.yaml` and system prompts; Dify targets product managers and ops teams who
want to deploy LLM apps without engineering. The ~60k ⭐ signal shows massive
no-code demand that our current product does not address.
**Worth borrowing:**
- **Human Input node** — native human-in-the-loop as a workflow node type, not a
separate approvals API. Map to our `approvals` table: expose a "wait for human"
node in a future visual workspace config editor.
- **Summary Index** (v1.14.0) — AI-generated summaries per document chunk in the
RAG knowledge base significantly improve retrieval precision. Applicable to our
Research Lead workspace's document retrieval; evaluate for our MCP memory backend.
- **Knowledge base management UI** — file upload → chunk → embed → retrieval test
in a single interface. Reference design for our future `agent_memories` admin UI.
- **Channel trigger UX** — same as n8n: three-click channel connect. Our channel
setup is more manual; Dify is a second data point that this is the target UX.
**Terminology collisions:**
- "workflow" — Dify: the visual graph of LLM+tool nodes that defines an app; ours:
informal delegation chain. In competitive positioning copy, distinguish "no-code
workflow builder" (Dify) vs "multi-agent org" (us).
- "agent" — Dify: a single ReAct loop node inside a workflow; ours: a long-lived
Docker workspace with an assigned role. Different scope and persistence model.
- "knowledge base" — Dify: an indexed file collection for RAG; ours: not a
first-class term (workspace agents manage their own retrieval).
**Signals to react to:**
- If Dify ships persistent agent identity (agents that remember state across workflow
runs, not just within one) → closes the primary product gap; ~60k ⭐ + no-code
accessibility is a formidable combination.
- If Dify adds multi-agent coordination (agents that spawn and coordinate sub-agents
as org peers, not just nested workflow nodes) → direct overlap with our multi-
workspace hierarchy.
- If the $30M Pre-A closes more enterprise deals → Dify moves up-market; watch for
enterprise canvas and RBAC features that would narrow our enterprise differentiation.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~60k ⭐, v1.14.0 Feb 2026; $30M Pre-A Mar 2026
---
### Flowise — `FlowiseAI/Flowise`
**Pitch:** "Build AI Agents, Visually — drag-drop UI to build LLM flows and agent
pipelines using LangChain and LlamaIndex components."
**Shape:** Node.js + React (MIT repo; post-Workday acquisition terms TBD), ~30k ⭐,
flowise@3.1.0 released March 16 2026. Drag-drop visual node editor where LangChain
chains, LlamaIndex query engines, vector stores, tools, and agents are wired as a
flow graph. Each flow is exported as a JSON config; the Flowise server exposes a
REST API and a chat widget embed. **Agentflow** (shipped 2024) adds multi-agent
composition: a Supervisor agent routes tasks to Worker agents within a single Flowise
flow. **Acquired by Workday** (announced August 2025) — Flowise is now part of
Workday's AI platform, bringing agent-building capability to Workday customers.
Security: three chained CVEs (CVE-2025-59528, CVE-2025-8943, CVE-2025-26319) enabling
unauthenticated RCE via Custom MCP Node were patched in v3.0.6 (exploit confirmed
April 7 2026).
**Overlap with us:** Both are drag-drop visual builders for AI agent workflows. Both
support LangChain components under the hood. Flowise's Agentflow (Supervisor + Worker
agents) mirrors our PM → engineer hierarchy, but within a single visual flow rather
than independently deployed Docker workspaces. Flowise's REST API per flow is
structurally similar to our `POST /workspaces/:id/a2a` endpoint — both let external
systems trigger an agent and get a response. Channel integrations overlap with our
`workspace_channels`.
**Differentiation:** Flowise is a **no-code single-server app builder** — agents are
stateless flow executions, not long-lived Docker workspaces with persistent memory,
schedules, and org identity. Post-Workday acquisition, Flowise targets Workday
enterprise customers (HR, finance, ops) rather than developer-first teams building AI
companies. No persistent agent memory between flow runs, no A2A protocol between
independently deployed agents, no cron scheduling, no org-chart canvas. The Workday
acquisition actually *narrows* Flowise's addressable market to Workday-centric
enterprises — which opens space for Molecule AI as the developer-first alternative.
**Worth borrowing:**
- **Agentflow Supervisor/Worker pattern** — the Supervisor agent dynamically routes
tasks to Workers based on their capabilities, with results aggregated back. More
flexible than our static PM → Lead delegation; study for dynamic routing in the PM
workspace's `delegate_task`.
- **Flow-as-JSON export/import** — each Flowise flow is a portable JSON blob that
can be versioned, shared, and re-imported. Our workspace `config.yaml` is close;
adding a full workflow export (config + memory schema + skill list) as a bundle
would enable the same portability.
- **Chat widget embed** — single-line script tag embeds a Flowise agent as a chat
widget on any webpage. Our `workspace_channels` is closer to outbound messaging;
a widget embed for inbound is a UX gap worth closing for developer adoption.
**Terminology collisions:**
- "flow" — Flowise: a visual JSON graph of LangChain nodes; ours: not a first-class
term. Avoid "flow" in our visual canvas docs to prevent confusion with Flowise-
trained users.
- "node" — Flowise: a LangChain component tile in the flow canvas; our canvas: a
workspace tile. Same word, same visual metaphor, different semantics.
- "supervisor" / "worker" — Flowise Agentflow roles; our PM / engineer hierarchy is
the same concept with different names. Our marketing should own "PM + engineer"
framing to stay distinct.
**Signals to react to:**
- If Workday opens Flowise APIs to non-Workday enterprise customers → Flowise
re-enters the general market with Workday distribution; update competitive messaging.
- If the CVE chain (RCE via Custom MCP Node) causes enterprise churn → opportunity
to position Molecule AI's Docker-isolated workspaces as the security-first
alternative for self-hosted agent deployments.
- If Flowise ships persistent agent memory or cross-flow A2A → closes primary gap;
monitor quarterly given Workday engineering resources.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~30k ⭐, flowise@3.1.0 March 16 2026; acquired by Workday Aug 2025
---
## Candidates to add (backlog)
Short-list of projects to write up next time someone has an hour:
- **LangGraph** (`langchain-ai/langgraph`) — we already support it as a
runtime; worth a full entry for how their graph model compares to our
workspace hierarchy.
- **AutoGen** (`microsoft/autogen`) — ditto, we adapt it.
- **CrewAI** (`crewaiinc/crewai`) — ditto.
- **DeepAgents** (`langchain-ai/deepagents`) — ditto; particularly their
- **AutoGen** (`microsoft/autogen`) — Microsoft's original repo; now superseded by
Microsoft Agent Framework (tracked above) and AG2 community fork (tracked above).
Entry should clarify which adapter target is canonical.
- **DeepAgents** (`langchain-ai/deepagents`) — we adapt it; particularly their
sub-agent feature that collides with our "skills" word.
- **OpenClaw** — check if this is still live post-Hermes rebrand; our
adapter may need renaming.
- **Moltiverse / Moltbook** (`molti-verse.com`) — "social network for AI
agents." Not a competitor; orthogonal ecosystem but worth tracking in
case we want agent-to-agent discovery beyond a single org.
- **Temporal** (`temporalio/temporal`) — we already integrate; entry
should cover when to lean on Temporal vs our in-house scheduling.
---
### OpenAI Agents SDK — Sandbox Agents — `openai/openai-agents-python`
**Pitch:** "A lightweight, powerful framework for multi-agent workflows — now with
persistent isolated sandbox workspaces, snapshot/resume, and sandbox memory."
**Shape:** Python (MIT), ~14k ⭐ (110 stars today), v0.14.0 released April 15, 2026.
New beta surface: `SandboxAgent` backed by a `Manifest` (file tree, Git repo,
mounts) and a `SandboxRunConfig` that targets a pluggable execution backend.
Local: `UnixLocalSandboxClient`; containerised: `DockerSandboxClient`; hosted via
optional extras for Blaxel, Cloudflare, Daytona, E2B, Modal, Runloop, and Vercel.
**Sandbox memory** lets future runs inherit lessons from prior runs with progressive
disclosure and configurable isolation boundaries. Existing SDK primitives (Agents,
Handoffs, Guardrails, Tracing) are unchanged.
**Overlap with us:** `SandboxAgent` + hosted backends directly competes with our
workspace lifecycle model — a persistent isolated execution environment, snapshot
and resume, durable memory. The multi-backend strategy (Docker, Modal, Vercel, E2B)
mirrors our Docker workspace + cloud-provider abstraction goal. Sandbox memory is
the same cross-session memory gap we address via `agent_memories`.
**Differentiation:** Still a framework, not a platform — no visual canvas, no
org-chart hierarchy, no A2A between independently deployed sandboxes (handoffs are
in-process), no cron scheduling, no channel integrations. OpenAI-provider-optimised
in practice. Our differentiators: multi-agent org hierarchy with A2A, model-agnostic,
self-hostable, persistent agent identity beyond a single SDK process.
**Worth borrowing:** `SandboxRunConfig` backend abstraction — decouple workspace
execution from provider (Docker / Modal / Vercel) using a single config object.
Directly applicable to our workspace provisioner. Sandbox memory progressive
disclosure (summaries first, full context on demand) matches the retrieval strategy
in claude-mem; adopt for `agent_memories` query API.
**Terminology collisions:** "sandbox" — theirs: an isolated execution backend; ours:
not a first-class term (we use "workspace" / "container"). "memory" — same word,
same intent; our `agent_memories` and their sandbox memory are functionally equivalent.
**Signals to react to:** If OpenAI adds inter-sandbox A2A (sandboxes delegating to
each other across process boundaries) → direct platform feature parity; accelerate
our A2A documentation and SDK ergonomics. If hosted backends gain TypeScript support
(announced as roadmap) → Vercel + TS stack competes for our TypeScript-native users.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~14k ⭐, v0.14.0 April 15, 2026, OpenAI-maintained
---
### Tencent AI-Infra-Guard — `Tencent/AI-Infra-Guard`
**Pitch:** "A full-stack AI Red Teaming platform securing AI ecosystems via Agent
Scan, Skills Scan, MCP scan, AI Infra scan, and LLM jailbreak evaluation."
**Shape:** Python + Go (Apache-2.0), ~3.5k ⭐, v4.1.3 released April 9, 2026.
Tencent Zhuque Lab. Six scanning surfaces: ClawScan (open-source code security),
Agent Scan (runtime agent behaviour audit), Skills Scan (verifying installed agent
skills), MCP Server scan (tool-surface vulnerability detection), AI infrastructure
CVE matching (1000+ CVEs across 57+ AI components including crewai, kubeai,
lobehub), and LLM jailbreak evaluation. Ships a web UI, REST API, Docker deployment,
and integration with ClawHub agent marketplace.
**Overlap with us:** Our plugin/skills registry and MCP server are exactly the
surfaces AI-Infra-Guard scans. The Skills Scan module validates installed agent
skill packs — the same artefacts our `plugins/` directory ships. MCP Server scan
targets the same `@molecule-ai/mcp-server` surface our platform exposes. If
enterprise customers adopt AI-Infra-Guard for compliance audits, our plugin manifests
and MCP tool definitions need to be compatible with its scanner.
**Differentiation:** A security tooling product, not an agent framework or platform.
No agent runtime, no orchestration, no canvas, no memory. Molecule AI builds and
runs agents; AI-Infra-Guard audits them and their supply chain.
**Worth borrowing:** MCP Server scan vulnerability categories — use as a checklist
for hardening our own MCP server (`@molecule-ai/mcp-server`) before an enterprise
security review. Skills Scan concept — add a `plugin validate` sub-command to
`molecli` that runs the same checks locally before installing a plugin.
**Terminology collisions:** "agent scan" — their runtime audit process; not a term
we use. "skills scan" — their validation of installed skill packs; same artefact,
different word ("plugin audit" in our vocabulary).
**Signals to react to:** If AI-Infra-Guard publishes a formal MCP tool-surface
security spec → treat as a compliance baseline for our MCP server hardening. If
Tencent integrates this into enterprise procurement checklists → our plugin and MCP
docs need an explicit security posture section to pass audits.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~3.5k ⭐, v4.1.3 April 9, 2026, Tencent Zhuque Lab
---
### VoltAgent — `VoltAgent/voltagent`
**Pitch:** "The open-source TypeScript AI agent framework with a built-in
observability and deployment console — build agents once, run and monitor them
everywhere."
**Shape:** TypeScript (MIT), ~8.2k ⭐, 668 releases, latest April 11, 2026.
Two-layer design: `@voltagent/core` framework (typed agent definitions, tool
registry, multi-agent supervisor/sub-agent coordination, memory, RAG, voice,
guardrails) + **VoltOps Console** (hosted or self-hosted web UI for observability,
deployment automation, and agent lifecycle management). MCP client support connects
any MCP server as a tool source. Provider-agnostic: OpenAI, Anthropic, Google,
Ollama, and any OpenAI-compatible endpoint. Ships `@voltagent/server-elysia` for
Bun-native HTTP serving of agents.
**Overlap with us:** VoltOps Console is the closest analogue to our Canvas we've
tracked in the TypeScript ecosystem — both provide a web UI for managing and
monitoring long-lived agents. The supervisor/sub-agent coordination model mirrors
our PM → engineer delegation. MCP support means workspace skills install into
VoltAgent as easily as ours. `@voltagent/server-elysia` pattern (agent as an HTTP
server) is analogous to our A2A endpoint per workspace.
**Differentiation:** No Docker workspace isolation, no persistent agent identity
across server restarts, no A2A protocol between independently deployed agents, no
cron scheduling, no channel integrations. VoltOps Console focuses on observability
and deployment automation; our Canvas is the live visual org chart with drag-drop
topology control. Molecule AI targets multi-agent companies; VoltAgent targets
individual TypeScript developers building production agents.
**Worth borrowing:** VoltOps observability schema — trace views, agent state
inspection, and deployment automation as a single UI surface. Reference design for
merging our Canvas agent-inspection panel with Langfuse traces into a unified
observability tab. `@voltagent/core` typed agent definition API (role, memory,
tools, guardrails as typed config) — cleaner than our YAML-then-system-prompt
pipeline; evaluate for a future typed workspace config schema.
**Terminology collisions:** "console" — VoltOps Console: their monitoring + deploy
UI; our `molecli`: a TUI dashboard. Both are "consoles" for watching agents.
"supervisor" — their orchestrating agent tier; our PM workspace plays the same role.
**Signals to react to:** If VoltOps Console adds visual org-chart topology (not just
list view) → direct Canvas competitor in the TypeScript ecosystem. If
`@voltagent/core` multi-agent API becomes idiomatic for TS agent developers →
consider shipping an official Molecule AI VoltAgent runtime adapter alongside our
langgraph/crewai adapters.
**Last reviewed:** 2026-04-16 · **Stars / activity:** ~8.2k ⭐, 668 releases, latest April 11, 2026

View File

@ -0,0 +1,112 @@
# Competitor Tracker
> **Auto-maintained by PMM cron** — diffs `docs/ecosystem-watch.md` on schedule
> to detect version bumps, threat escalations, and notable changes.
>
> Source of truth for competitor state: `docs/ecosystem-watch.md#competitor-snapshot`
> Full narrative analysis: `docs/ecosystem-watch.md#entries`
>
> **Last updated:** 2026-04-17 (bootstrap — subsequent updates by PMM cron)
---
## High-Threat Competitors
Platforms that directly substitute for or significantly erode Molecule AI's market position.
| Competitor | Version | Stars | Threat Signal | Updated |
|---|---|---|---|---|
| [OpenAI Agents SDK](https://github.com/openai/openai-agents-python) | v0.14.1 | 14k | v0.14.1 SandboxAgent beta — persistent isolated workspaces, snapshot/resume, sandbox memory; directly competes with our workspace lifecycle | 2026-04-17 |
| [CrewAI](https://github.com/crewAIInc/crewAI) | v1.14.1 | 48k | 1.4B agentic automations, 60% Fortune 500 adoption, $18M Insight-led round; CrewAI Enterprise SaaS targeting our enterprise segment | 2026-04-17 |
| [Google ADK](https://github.com/google/adk-python) | v1.30.0 | 19k | v1.30.0 adds Auth Provider registry; full Google agent stack (ADK + Gemini CLI + adk-web DevUI + Scion harness) = largest platform risk | 2026-04-17 |
| [Microsoft Agent Framework](https://github.com/microsoft/agent-framework) | python-1.0.1 | 9.5k | v1.0 GA (official AutoGen successor); SOC 2/HIPAA compliance; .NET + Python; Process Framework GA in Q2 2026 | 2026-04-17 |
---
## Medium-Threat Competitors
Significant overlap in adjacent space; active watch required.
| Competitor | Version | Stars | Notes | Updated |
|---|---|---|---|---|
| [Paperclip](https://github.com/paperclipai/paperclip) | v2026.416.0 | 54.8k | Downgraded HIGH→MEDIUM (deep-dive #571): no A2A, no visual canvas on roadmap; single-process task DAG only; brand/framing threat ("zero-human companies"), not a technical substitute. Only gap vs Molecule AI: per-workspace budget limits (#541). | 2026-04-17 |
| [Dify](https://github.com/langgenius/dify) | v1.13.3 | 60k | v1.14.0 RC adds Human Input node; $30M Pre-A ($180M val); no-code positioning targets business users, not our developer audience | 2026-04-17 |
| [LangGraph](https://github.com/langchain-ai/langgraph) | v1.1.6 | 29k | CLI v0.4.22 Apr 16; LangGraph Cloud hosted execution competes with our scheduler | 2026-04-17 |
| [VoltAgent](https://github.com/VoltAgent/voltagent) | server-elysia@2.0.7 | 8.2k | VoltOps Console = closest Canvas analogue in TypeScript ecosystem | 2026-04-17 |
| [n8n](https://github.com/n8n-io/n8n) | v2.17.2 | 50k | n8n 2.0 enterprise AI Agent nodes + RBAC + 400+ channel integrations | 2026-04-17 |
| [Claude Code Routines](https://code.claude.com/docs/en/routines) | cloud-feature | — | Apr 14 2026 launch: Anthropic-hosted cron + GitHub-event-triggered Claude Code sessions | 2026-04-17 |
| [Scion](https://github.com/GoogleCloudPlatform/scion) | active | early | GCP experimental container-per-agent harness (Apr 8 2026); escalation risk to HIGH if productized | 2026-04-17 |
| [Multica](https://github.com/multica-ai/multica) | active | 12.8k | Positioned as Claude Managed Agents alternative; local daemon + central backend with skill compounding | 2026-04-17 |
| [Cline](https://github.com/cline/cline) | active | 44k | Primary user-overlap with our Claude Code workspace; developers who outgrow Cline convert to Molecule AI | 2026-04-17 |
| [ClawRun](https://github.com/clawrun-sh/clawrun) | active | 84 | Closest architectural match tracked (sandbox/heartbeat/snapshot-resume/channels/cost-tracking); early stage but actively shipped | 2026-04-17 |
| [Gemini CLI](https://github.com/google-gemini/gemini-cli) | v0.38.1 | 101k | Runtime candidate for our workspace adapter; elevated to MEDIUM as part of Google's full agent stack | 2026-04-17 |
---
## Low-Threat Competitors
Tools, infra layers, single-agent products, or projects we use — not direct substitutes.
| Competitor | Version | Stars | Role | Updated |
|---|---|---|---|---|
| [Hermes Agent](https://github.com/NousResearch/hermes-agent) | v0.10.0 | 61k | v0.10.0 (Apr 16) Tool Gateway launch; personal AI single-user shape | 2026-04-17 |
| [gstack](https://github.com/garrytan/gstack) | active | 70k | Sequential single-session Claude Code persona-switching; no multi-agent infra | 2026-04-17 |
| [claude-mem](https://github.com/thedotmack/claude-mem) | active | 56k | Memory addon; 56k ⭐ signals demand gap we need to close in agent_memories | 2026-04-17 |
| [Flowise](https://github.com/FlowiseAI/Flowise) | flowise@3.1.2 | 30k | Acquired by Workday (Aug 2025); v3.1.2 security hardening; narrowed to HR/finance enterprise | 2026-04-17 |
| [OpenHands](https://github.com/All-Hands-AI/OpenHands) | v1.6.0 | 47k | SWE-Bench top; v1.6.0 (Mar 30); single-agent software engineer only | 2026-04-17 |
| [Temporal](https://github.com/temporalio/temporal) | v1.30.4 | 13k | Durable execution infra we integrate; $5B valuation, not a competitor | 2026-04-17 |
| [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) | active | 35.5k | Browser MCP we adopt (issue #540); 23-tool surface | 2026-04-17 |
| [AgentScope](https://github.com/modelscope/agentscope) | v1.0.18 | 23.8k | Alibaba/ModelScope framework; MCP integration; no deployment layer | 2026-04-17 |
| [Composio](https://github.com/composio-dev/composio) | active | 18k | Tool integration library; potential skill-pack dependency | 2026-04-17 |
| [Archon](https://github.com/coleam00/Archon) | v0.3.6 | 18.1k | YAML-DAG coding workflow; reference design for workspace delivery pipelines | 2026-04-17 |
| [Skills CLI](https://github.com/vercel-labs/skills) | active | 14.2k | Vercel agentskills.io CLI; aligning plugins/ = free distribution channel | 2026-04-17 |
| [Holaboss](https://github.com/holaboss-ai/holaboss-ai) | active | 1.7k | Desktop AI employee; terminology collisions (workspace/SKILL.md) | 2026-04-17 |
| [Tencent AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard) | v4.1.3 | 3.5k | Security scanner; use as MCP + plugin registry compliance checklist | 2026-04-17 |
| [Plannotator](https://github.com/backnotprop/plannotator) | v0.17.10 | 4.3k | HITL plan annotation UX; reference for improving approvals API schema | 2026-04-17 |
| [open-multi-agent](https://github.com/JackChen-me/open-multi-agent) | v1.1.0 | 5.7k | TypeScript goal-to-DAG library; ephemeral, no identity | 2026-04-17 |
| [Open Agents (Vercel)](https://github.com/vercel-labs/open-agents) | active | 2.2k | Reference app; snapshot-based VM resumption pattern worth borrowing | 2026-04-17 |
| [GenericAgent](https://github.com/lsdefine/GenericAgent) | v1.0 | 2.1k | Self-evolving skill tree; four-tier memory taxonomy worth borrowing | 2026-04-17 |
| [OpenSRE](https://github.com/Tracer-Cloud/opensre) | active | 900 | AI SRE toolkit; potential DevOps workspace skill-pack source | 2026-04-17 |
| [AMD GAIA](https://github.com/amd/gaia) | v0.17.2 | 1.2k | Hardware-locked (AMD Ryzen AI 300+); not general-purpose | 2026-04-17 |
---
## Watchlist — Escalation Signals
The following events would require immediate threat-level re-assessment:
| Competitor | Watch Signal | Current Level | Escalates To |
|---|---|---|---|
| Paperclip | Ships persistent agent memory | MEDIUM | HIGH — 54.8k ⭐ head-start |
| Paperclip | Ships visual org-chart canvas | MEDIUM | HIGH — direct Canvas competitor |
| Scion | Google productizes as managed GCP service | MEDIUM | HIGH |
| VoltAgent | VoltOps Console adds visual org-chart topology | MEDIUM | HIGH |
| Google ADK | ADK + Vertex AI becomes hosted managed platform | HIGH | CRITICAL |
| OpenAI Agents SDK | Inter-sandbox A2A across process boundaries | HIGH | CRITICAL |
| ClawRun | Adds A2A or multi-agent coordination | MEDIUM | HIGH |
| gstack | Adds multi-session/parallel execution | LOW | HIGH — 70k ⭐ head-start |
| Claude Code Routines | Adds A2A between routine sessions | MEDIUM | HIGH — Anthropic distribution |
---
## Recently Changed (last 30 days)
> PMM cron updates this section automatically when `notable_changes` or `version` fields change.
| Date | Competitor | Change |
|---|---|---|
| 2026-04-17 | **Paperclip** | Threat downgraded HIGH→MEDIUM (deep-dive #571): no A2A, no canvas, brand threat only |
| 2026-04-17 | **Paperclip** | v2026.416.0 — execution policies + chat threads for agent transcripts |
| 2026-04-17 | **Hermes Agent** | v0.10.0 — Tool Gateway (web search, image gen, TTS, browser automation) |
| 2026-04-16 | **LangGraph CLI** | v0.4.22 — deploy source tracking |
| 2026-04-15 | **OpenAI Agents SDK** | v0.14.1 — tracing patch on top of Sandbox Agents beta |
| 2026-04-15 | **Gemini CLI** | v0.38.1 — stability patch |
| 2026-04-14 | **Flowise** | v3.1.2 — security hardening (CORS, credential leaks) |
| 2026-04-14 | **Claude Code Routines** | Launched — Anthropic-hosted cron-triggered Claude Code sessions |
| 2026-04-13 | **Google ADK** | v1.30.0 — Auth Provider + Parameter Manager + Gemma 4 support |
| 2026-04-11 | **VoltAgent** | server-elysia@2.0.7 — A2A agent card URL fix |
| 2026-04-10 | **LangGraph** | v1.1.6 — declarative guardrail nodes (LangGraph 2.0 GA) |
| 2026-04-10 | **Temporal** | v1.30.4 — CVE-2026-5724 security patch |
| 2026-04-10 | **Microsoft Agent Framework** | python-1.0.1 — FileCheckpointStorage security hardening |
| 2026-04-08 | **Scion** | Launched — GCP container-per-agent experimental harness |
| 2026-04-08 | **CrewAI** | v1.14.1 — async checkpoint TUI browser |

View File

@ -0,0 +1,92 @@
# Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)
Molecule AI can provision agent workspaces as [Fly Machines](https://fly.io/docs/machines/) instead of local Docker containers. Set `CONTAINER_BACKEND=flyio` on your platform and every `POST /workspaces` call creates a Fly Machine in your app — with tier-based resource limits, env-var injection, and A2A registration handled automatically.
> **Scope note (PR #501):** Workspace images must already be published to GHCR before provisioning. The `delete` and `restart` platform endpoints are not yet fully wired to the Fly provisioner — use `flyctl machine stop/destroy` for teardown until a follow-up PR lands.
## What you'll need
- A Molecule AI platform instance
- A [Fly.io](https://fly.io) account with a Fly app created for workspace machines
- `flyctl` installed locally
- `curl` + `jq`
## Setup
```bash
# 1. Set CONTAINER_BACKEND and Fly credentials on your platform process
# (add to your platform's .env or deployment config)
export CONTAINER_BACKEND=flyio
export FLY_API_TOKEN=<your-fly-deploy-token> # flyctl tokens create deploy
export FLY_WORKSPACE_APP=my-molecule-workspaces # fly app created for this purpose
export FLY_REGION=ord # optional, default: ord
# 2. Restart the platform so it picks up CONTAINER_BACKEND=flyio
# (varies by your deployment — docker restart, systemd reload, etc.)
# 3. Verify the platform is using the Fly provisioner
curl -s http://localhost:8080/healthz | jq .
# 4. Create a workspace — the platform provisions it as a Fly Machine
WS=$(curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "fly-worker",
"role": "Fly-provisioned inference worker",
"runtime": "hermes",
"tier": 2
}' | jq -r '.id')
echo "Workspace ID: $WS"
# 5. Watch the Fly Machine appear (~1530s)
flyctl machines list --app $FLY_WORKSPACE_APP
# 6. Poll until the workspace is ready
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 7. Smoke test — send an A2A task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"What region are you running in?"}]}}}' \
| jq '.result.parts[0].text'
# 8. Inspect the Fly Machine details
flyctl machines show --app $FLY_WORKSPACE_APP
# 9. Teardown (see scope note — use flyctl directly for now)
flyctl machines destroy --app $FLY_WORKSPACE_APP --force
```
## Expected output
Step 5 (`flyctl machines list`) shows the new machine with a `started` state within ~30 seconds. The platform injects your workspace secrets, `PLATFORM_URL`, and workspace ID as environment variables on the machine, then issues an auth token so the agent registers on boot.
Step 7 returns the agent's reply — proof that A2A JSON-RPC is routing through the Fly Machine correctly. The `FLY_REGION` env var is visible inside the container, so asking the agent "What region are you running in?" should return `ord` (or whichever region you set).
## Resource tiers
The Fly provisioner applies tier-based limits automatically — no manual machine sizing needed:
| Tier | RAM | CPUs | Use case |
|------|-----|------|----------|
| T2 | 512 MB | 1 | Light workers, eval agents |
| T3 | 2 GB | 2 | General-purpose orchestrators |
| T4 | 4 GB | 4 | Heavy inference, long-context tasks |
Set `"tier": 2`, `3`, or `4` in your `POST /workspaces` body. Runtime images are resolved from GHCR automatically (`hermes` → `ghcr.io/molecule-ai/workspace-hermes:latest`).
## Why Fly Machines
Fly Machines start in milliseconds and run in 35+ regions. Provisioning agent workspaces on Fly means your inference workers can live close to your users with no infrastructure code changes — just set `FLY_REGION` per workspace. Because the Fly provisioner implements the same `Provisioner` interface as the Docker backend, the rest of the platform is unchanged: same REST API, same A2A protocol, same workspace management UI.
## Related
- PR #501: [feat(platform): Fly Machines provisioner](https://github.com/Molecule-AI/molecule-core/pull/501)
- PR #481: [feat(ci): deploy to Fly after image push](https://github.com/Molecule-AI/molecule-core/pull/481)
- [Fly Machines API docs](https://fly.io/docs/machines/api/)
- [Platform API reference](../api-reference.md)
- Issue [#525](https://github.com/Molecule-AI/molecule-core/issues/525)

View File

@ -0,0 +1,74 @@
# Running a Google ADK Workspace on Molecule AI
Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
## What you'll need
- A Molecule AI account with at least one provisioned tenant
- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
- `curl` + `jq`
## Setup
```bash
# 1. Store your Google API key as a global secret
curl -s -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
# 2. Create a google-adk workspace
WS=$(curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "adk-agent",
"role": "Google ADK inference worker",
"runtime": "google-adk",
"model": "google:gemini-2.0-flash"
}' | jq -r '.id')
echo "Workspace: $WS"
# 3. Wait for ready (~30s)
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 4. Send your first task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
| jq '.result.parts[0].text'
# 5. Multi-turn — session state is preserved across calls
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
| jq '.result.parts[0].text'
# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
```
## Expected output
After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
## How it works
The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
## Mixed-runtime teams
ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
## Related
- PR #550: [feat(adapters): add google-adk runtime adapter](https://github.com/Molecule-AI/molecule-core/pull/550)
- [Google ADK (adk-python)](https://github.com/google/adk-python)
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
- [Platform API reference](../api-reference.md)

View File

@ -0,0 +1,184 @@
# Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History
Hermes is Molecule AI's inference router. Out of the box it proxies every model through an OpenAI-compatible shim. That works for plain text, but the shim does format translation on every round-trip — and it gets the Gemini message format wrong (Gemini expects `role: "model"` and a `parts: [{text}]` wrapper; the shim passes `role: "assistant"` and a flat string). It also flattens multi-turn conversations into a single user blob, losing role attribution across turns.
Phases 2a2c wire three native dispatch paths keyed on `auth_scheme`. This tutorial shows you how to unlock them.
> **Phase 2d scope note:** Tool calling, vision content blocks, system instructions, and streaming on the native paths are scoped for Phase 2d and are **not yet shipped**. This tutorial covers what is merged today: correct native dispatch + multi-turn history continuity.
## What you'll need
- A Molecule AI account with API access
- `ANTHROPIC_API_KEY` **or** `GEMINI_API_KEY` (or both)
- `curl` + `jq`
## The dispatch table
After Phases 2a / 2b / 2c, Hermes picks an inference path based on which provider is configured:
| `auth_scheme` | Dispatch path | Provider | API |
|---|---|---|---|
| `openai` | `_do_openai_compat` | 13 providers (OpenRouter, Groq, Mistral…) | OpenAI-compat shim |
| `anthropic` | `_do_anthropic_native` | Anthropic | Native Messages API |
| `gemini` | `_do_gemini_native` | Google | Native `generateContent` |
| unknown | `_do_openai_compat` + warning | any | OpenAI-compat shim (forward-compat) |
**Rule of thumb:** set `ANTHROPIC_API_KEY` to get native Anthropic dispatch. Set `GEMINI_API_KEY` to get native Gemini dispatch. Set `NOUS_API_KEY` / `HERMES_API_KEY` / `OPENROUTER_API_KEY` to stay on the compat shim. Molecule AI reads these in priority order: `HERMES_API_KEY``OPENROUTER_API_KEY``ANTHROPIC_API_KEY``GEMINI_API_KEY`. The **first key found wins**, so don't set `HERMES_API_KEY` if you want native dispatch.
---
## Setup
```bash
# 0. Export your platform URL and a workspace to use as orchestrator
export MOLECULE_API=http://localhost:8080
export ORCH_ID=<your-orchestrator-workspace-id>
# 1. Store your Anthropic key as a global secret
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-YOUR-KEY"}' | jq .
# 2. Create a Hermes workspace — Anthropic native dispatch
ANTHROPIC_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-anthropic",
"role": "Inference worker — native Anthropic path",
"runtime": "hermes",
"model": "anthropic:claude-sonnet-4-5"
}' | jq -r '.id')
echo "Anthropic workspace: $ANTHROPIC_WS"
# 3. Wait for it to be ready (~2030s)
until curl -s $MOLECULE_API/workspaces/$ANTHROPIC_WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 4. Store your Gemini key as a global secret
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}' | jq .
# 5. Create a Hermes workspace — Gemini native dispatch
GEMINI_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-gemini",
"role": "Inference worker — native Gemini path",
"runtime": "hermes",
"model": "gemini:gemini-2.0-flash"
}' | jq -r '.id')
echo "Gemini workspace: $GEMINI_WS"
# 6. Pin the Gemini workspace to Gemini-only keys (no ANTHROPIC_API_KEY override)
curl -s -X PUT $MOLECULE_API/workspaces/$GEMINI_WS/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":""}' | jq .
# 7. Confirm dispatch — send a single-turn probe to the Anthropic workspace
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"Which API are you using to generate this response?"}]}}
}' | jq '.result.parts[0].text'
# 8. Same probe to the Gemini workspace
curl -s -X POST $MOLECULE_API/workspaces/$GEMINI_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"Which API are you using to generate this response?"}]}}
}' | jq '.result.parts[0].text'
# 9. Multi-turn history — Phase 2c keeps turns as turns (not flattened)
# Send turn 1
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"My name is Alice. Remember that."}]}}
}' | jq '.result.parts[0].text'
# 10. Send turn 2 — history is automatically threaded by Hermes Phase 2c
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"What is my name?"}]}}
}' | jq '.result.parts[0].text'
# Expected: "Alice" — not "I don't know", which the old flattened path could produce
```
## Expected output
**Step 7 (Anthropic workspace):** The agent confirms it is calling the Anthropic Messages API natively. Hermes executed `_do_anthropic_native` — no OpenAI-compat translation layer.
**Step 8 (Gemini workspace):** The agent confirms Google `generateContent`. Hermes called `_do_gemini_native`, which passes `role: "model"` (not `"assistant"`) and the `parts: [{text: ...}]` wrapper the native SDK requires. The compat-shim translation that produced incorrect message format is bypassed.
**Step 10 (multi-turn, Phase 2c):** Returns `"Alice"`. Before Phase 2c, history was flattened into a single user blob — the model could recover the gist but lost clean role attribution. Phase 2c passes turns as turns: OpenAI uses `{role, content}`, Anthropic uses the same wire shape for text-only, Gemini uses `{role: "model", parts: [{text}]}`.
## How dispatch works under the hood
`HermesA2AExecutor._do_inference(user_message, history)` reads `self.provider_cfg.auth_scheme`:
```python
if self.provider_cfg.auth_scheme == "anthropic":
return await self._do_anthropic_native(user_message, history)
elif self.provider_cfg.auth_scheme == "gemini":
return await self._do_gemini_native(user_message, history)
else: # "openai" + unknown (forward-compat fallback)
return await self._do_openai_compat(user_message, history)
```
Fail-loud semantics: if the `anthropic` package isn't installed, `_do_anthropic_native` raises a clear `RuntimeError` before any inference attempt. Same for `google-genai`. Silent fallback to the compat shim would mask format errors — Molecule AI chooses loud failure.
## Building a multi-provider team
The real win surfaces in a mixed-provider agent team. Your orchestrator can fan tasks to an Anthropic worker and a Gemini worker simultaneously, each receiving properly formatted messages through their native API paths:
```bash
# Fan out from the orchestrator — both fire in parallel
curl -s -X POST $MOLECULE_API/workspaces/$ORCH_ID/a2a \
-H "Content-Type: application/json" \
-d "{
\"jsonrpc\":\"2.0\",\"id\":\"fan-1\",\"method\":\"message/send\",
\"params\":{\"message\":{\"role\":\"user\",\"parts\":[{\"kind\":\"text\",
\"text\":\"delegate_task_async $ANTHROPIC_WS 'Draft release notes for v2.1' AND delegate_task_async $GEMINI_WS 'Summarise the last 30 days of support tickets'\"}]}}
}" | jq .
```
Both workers use their native inference paths. No LiteLLM proxy layer. No format translation on every request. The orchestrator gets results back through the same A2A protocol regardless of which underlying model powered each task.
## Capability comparison: Hermes native vs the compat shim
What is shipping today (Phases 2a + 2b + 2c — all merged to main):
| Capability | OpenAI-compat shim | Anthropic native | Gemini native |
|---|---|---|---|
| Plain text (single-turn) | ✅ | ✅ | ✅ |
| Multi-turn history | ⚠️ flattened into one user blob | ✅ role-attributed turns | ✅ `role: "model"` + `parts` wrapper |
| Correct Gemini message format | ❌ wrong role + missing parts wrapper | — | ✅ |
| No compat-shim translation overhead | ❌ every request translated | ✅ | ✅ |
What is on the roadmap for Phase 2d (not yet shipped):
| Capability | Anthropic native | Gemini native |
|---|---|---|
| `tool_use` / `tool_result` blocks | 📋 Phase 2d | 📋 Phase 2d |
| Vision content blocks | 📋 Phase 2d | 📋 Phase 2d |
| System instructions (`system=`) | 📋 Phase 2d | 📋 Phase 2d (`system_instruction=`) |
| Extended thinking | 📋 Phase 2d | — |
| Streaming | 📋 Phase 2d | 📋 Phase 2d |
**Why Molecule AI vs Letta / AG2 / n8n:** Those frameworks handle multi-LLM at the application layer — you write different agent classes per provider. Molecule AI handles it at the infrastructure layer. Your workspace configs change; your orchestration code doesn't. Swap a Gemini worker for an Anthropic worker by changing one secret. No code redeploy.
## Related
- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
- [Hermes adapter design](../adapters/hermes-adapter-design.md)
- [Platform API reference](../api-reference.md)
- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"log"
"sync"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
@ -14,8 +15,17 @@ import (
const broadcastChannel = "events:broadcast"
// sseSubscription is a single in-process SSE subscriber.
// deliverToSSE writes to ch; StreamEvents reads from it.
type sseSubscription struct {
workspaceID string
ch chan models.WSMessage
}
type Broadcaster struct {
hub *ws.Hub
hub *ws.Hub
ssesMu sync.RWMutex
sses []*sseSubscription
}
func NewBroadcaster(hub *ws.Hub) *Broadcaster {
@ -59,6 +69,9 @@ func (b *Broadcaster) RecordAndBroadcast(ctx context.Context, eventType string,
// Broadcast to local WebSocket clients
b.hub.Broadcast(msg)
// Fan out to in-process SSE subscribers (e.g. GET /events/stream).
b.deliverToSSE(msg)
return nil
}
@ -79,6 +92,52 @@ func (b *Broadcaster) BroadcastOnly(workspaceID string, eventType string, payloa
}
b.hub.Broadcast(msg)
// Fan out to in-process SSE subscribers.
b.deliverToSSE(msg)
}
// SubscribeSSE registers a per-workspace in-process channel for SSE streaming.
// The caller MUST invoke the returned cancel func when it disconnects so the
// subscription is removed and the channel is not leaked.
func (b *Broadcaster) SubscribeSSE(workspaceID string) (<-chan models.WSMessage, func()) {
sub := &sseSubscription{
workspaceID: workspaceID,
ch: make(chan models.WSMessage, 64),
}
b.ssesMu.Lock()
b.sses = append(b.sses, sub)
b.ssesMu.Unlock()
cancel := func() {
b.ssesMu.Lock()
defer b.ssesMu.Unlock()
for i, s := range b.sses {
if s == sub {
b.sses = append(b.sses[:i], b.sses[i+1:]...)
break
}
}
}
return sub.ch, cancel
}
// deliverToSSE fans msg out to every in-process SSE subscriber watching the
// same workspace. Non-blocking: if a subscriber's buffer is full the event is
// dropped with a log line (the WebSocket path still delivers it).
func (b *Broadcaster) deliverToSSE(msg models.WSMessage) {
b.ssesMu.RLock()
defer b.ssesMu.RUnlock()
for _, s := range b.sses {
if s.workspaceID != msg.WorkspaceID {
continue
}
select {
case s.ch <- msg:
default:
log.Printf("SSE: subscriber buffer full for workspace %s, dropping event %s", msg.WorkspaceID, msg.Event)
}
}
}
// Subscribe listens to Redis pub/sub and relays events to the WebSocket hub.

View File

@ -251,6 +251,12 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
if logActivity {
h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs)
}
// Track LLM token usage for cost transparency (#593).
// Fires in a detached goroutine so token accounting never adds latency
// to the critical A2A path.
go extractAndUpsertTokenUsage(context.WithoutCancel(ctx), workspaceID, respBody)
return resp.StatusCode, respBody, nil
}
@ -577,3 +583,65 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) e
// token" branch so the handler-level guard can detect it without string
// matching (the wsauth errors are typed for the invalid case).
var errInvalidCallerToken = errors.New("missing caller auth token")
// extractAndUpsertTokenUsage parses LLM usage from a raw A2A response body
// and persists it via upsertTokenUsage. Safe to call in a goroutine — logs
// errors but never panics. ctx must already be detached from the request.
func extractAndUpsertTokenUsage(ctx context.Context, workspaceID string, respBody []byte) {
in, out := parseUsageFromA2AResponse(respBody)
if in > 0 || out > 0 {
upsertTokenUsage(ctx, workspaceID, in, out)
}
}
// parseUsageFromA2AResponse extracts input_tokens / output_tokens from an A2A
// JSON-RPC response. Inspects two locations in order of preference:
// 1. result.usage — the JSON-RPC 2.0 result envelope from workspace agents.
// 2. usage — top-level, for non-JSON-RPC or direct Anthropic-shaped payloads.
//
// Returns (0, 0) when no recognisable usage data is found.
func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
if len(body) == 0 {
return 0, 0
}
var top map[string]json.RawMessage
if err := json.Unmarshal(body, &top); err != nil {
return 0, 0
}
// 1. result.usage (JSON-RPC 2.0 wrapper produced by workspace agents).
if rawResult, ok := top["result"]; ok {
var result map[string]json.RawMessage
if err := json.Unmarshal(rawResult, &result); err == nil {
if in, out, ok := readUsageMap(result); ok {
return in, out
}
}
}
// 2. Fallback: top-level usage (direct Anthropic or non-JSON-RPC response).
if in, out, ok := readUsageMap(top); ok {
return in, out
}
return 0, 0
}
// readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
// Returns (0, 0, false) when the key is absent or contains no non-zero values.
func readUsageMap(m map[string]json.RawMessage) (inputTokens, outputTokens int64, ok bool) {
rawUsage, has := m["usage"]
if !has {
return 0, 0, false
}
var usage struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
}
if err := json.Unmarshal(rawUsage, &usage); err != nil {
return 0, 0, false
}
if usage.InputTokens == 0 && usage.OutputTokens == 0 {
return 0, 0, false
}
return usage.InputTokens, usage.OutputTokens, true
}

View File

@ -54,6 +54,13 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
return // response already written
}
// #548 — prevent self-delegation: a workspace delegating to itself
// acquires _run_lock twice on the same mutex, deadlocking permanently.
if sourceID == body.TargetID {
c.JSON(http.StatusBadRequest, gin.H{"error": "self-delegation not permitted"})
return
}
// #124 — idempotency. If the caller supplies an idempotency_key, return
// the existing delegation when (workspace_id, idempotency_key) already
// exists and is not in a failed terminal state.

View File

@ -88,6 +88,37 @@ func TestDelegate_InvalidUUIDTargetID(t *testing.T) {
}
}
// ---------- Delegate: self-delegation → 400 ----------
func TestDelegate_SelfDelegation_Rejected(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
dh := NewDelegationHandler(wh, broadcaster)
// Use the same UUID for both source and target to trigger the self-delegation guard.
selfID := "11111111-2222-3333-4444-555555555555"
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: selfID}}
body := `{"target_id":"` + selfID + `","task":"do something"}`
c.Request = httptest.NewRequest("POST", "/workspaces/"+selfID+"/delegate", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
dh.Delegate(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["error"] != "self-delegation not permitted" {
t.Errorf("expected 'self-delegation not permitted', got %v", resp["error"])
}
}
// ---------- Delegate: success → 202 with delegation_id ----------
func TestDelegate_Success(t *testing.T) {

View File

@ -0,0 +1,115 @@
// Package handlers — GitHub App installation-token refresh endpoint.
//
// GET /admin/github-installation-token returns a fresh GitHub App
// installation token on demand. Long-running workspace containers use
// this as a git credential helper and for explicit `gh auth` re-runs
// so they never operate with an expired GH_TOKEN.
//
// # Why this endpoint?
//
// The github-app-auth plugin (PR #506) injects GH_TOKEN + GITHUB_TOKEN
// into a workspace container's env at provision time. Those tokens are
// GitHub App installation tokens with a fixed ~60 min TTL. The plugin
// keeps a server-side in-process cache and proactively refreshes it
// 5 min before expiry, but the workspace env is set once at container
// start and never updated — so any workspace alive >60 min ends up with
// an expired token (issue #547).
//
// The fix is:
//
// 1. Platform side (this file): expose GET /admin/github-installation-token.
// The handler delegates to the registered TokenProvider (typically the
// github-app-auth plugin), whose cache is always fresh. Gated behind
// AdminAuth — any valid workspace bearer token can call it.
//
// 2. Workspace side: a shell credential helper
// (workspace-template/scripts/molecule-git-token-helper.sh) configured
// as the git credential helper. git calls it on every push/fetch;
// it hits this endpoint and emits the fresh token to stdout. A 30-min
// cron also runs `gh auth login --with-token` using the same helper.
//
// # Approach chosen
//
// Option B (pre-flight/on-demand): workspaces poll for a token when
// they need one (credential helper callback). This is preferable over a
// background goroutine pusher (Option A) because:
//
// - The plugin already maintains its own refresh cache — there is no
// token to refresh on the platform side.
// - Pushing a new token into running containers requires docker exec /
// env mutation, which the architecture explicitly rejects (see issue
// #547 "Alternatives considered").
// - On-demand is pull-based, stateless, and trivially testable.
package handlers
import (
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
"github.com/gin-gonic/gin"
)
// GitHubTokenHandler serves GET /admin/github-installation-token.
type GitHubTokenHandler struct {
registry *provisionhook.Registry
}
// NewGitHubTokenHandler constructs the handler. registry may be nil when
// no GitHub App plugin is registered (dev / self-hosted deployments).
func NewGitHubTokenHandler(reg *provisionhook.Registry) *GitHubTokenHandler {
return &GitHubTokenHandler{registry: reg}
}
// GetInstallationToken handles GET /admin/github-installation-token.
//
// Returns:
//
// 200 {"token": "ghs_...", "expires_at": "2026-04-17T22:50:00Z"}
// 404 {"error": "no GitHub App configured"} — GITHUB_APP_ID not set
// 404 {"error": "no token provider registered"} — plugin loaded but
// doesn't implement TokenProvider
// 500 {"error": "token refresh failed"} — provider returned error
//
// The 404 vs 403 distinction is intentional: a 404 means the feature is
// simply not configured, not that the caller is forbidden. This matches
// the pattern used by GET /admin/workspaces/:id/test-token.
//
// Callers must retry with exponential back-off on 500 — a transient
// upstream GitHub API error should not permanently block git operations.
func (h *GitHubTokenHandler) GetInstallationToken(c *gin.Context) {
if h.registry == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "no GitHub App configured"})
return
}
provider := h.registry.FirstTokenProvider()
if provider == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "no token provider registered"})
return
}
token, expiresAt, err := provider.Token(c.Request.Context())
if err != nil {
log.Printf("[github] token refresh failed: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "token refresh failed"})
return
}
if token == "" {
log.Printf("[github] token provider returned empty token")
c.JSON(http.StatusInternalServerError, gin.H{"error": "token refresh failed: empty token"})
return
}
// Never log the token itself.
log.Printf("[github] served fresh installation token (expires %s, TTL %.0fs)",
expiresAt.Format(time.RFC3339),
time.Until(expiresAt).Seconds())
c.JSON(http.StatusOK, gin.H{
"token": token,
"expires_at": expiresAt.UTC().Format(time.RFC3339),
})
}

View File

@ -0,0 +1,232 @@
package handlers
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
"github.com/gin-gonic/gin"
)
// ─── mock helpers ────────────────────────────────────────────────────────────
// mockMutatorOnly implements EnvMutator but NOT TokenProvider.
type mockMutatorOnly struct{ name string }
func (m *mockMutatorOnly) Name() string { return m.name }
func (m *mockMutatorOnly) MutateEnv(_ context.Context, _ string, _ map[string]string) error {
return nil
}
// mockTokenMutator implements both EnvMutator and TokenProvider.
// Set err to simulate a provider failure; otherwise returns token + expiresAt.
type mockTokenMutator struct {
name string
token string
expiresAt time.Time
err error
}
func (m *mockTokenMutator) Name() string { return m.name }
func (m *mockTokenMutator) MutateEnv(_ context.Context, _ string, _ map[string]string) error {
return nil
}
func (m *mockTokenMutator) Token(_ context.Context) (string, time.Time, error) {
return m.token, m.expiresAt, m.err
}
// ─── request helper ──────────────────────────────────────────────────────────
func newGitHubTokenRequest() (*httptest.ResponseRecorder, *gin.Context) {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest(http.MethodGet, "/admin/github-installation-token", nil)
return w, c
}
// ─── tests ───────────────────────────────────────────────────────────────────
// TestGitHubToken_NilRegistry — no GitHub App plugin loaded at all.
// Expect 404 so operators can distinguish "not configured" from "forbidden".
func TestGitHubToken_NilRegistry(t *testing.T) {
h := NewGitHubTokenHandler(nil)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404 for nil registry, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if body["error"] == "" {
t.Error("expected non-empty error field in response")
}
}
// TestGitHubToken_NoTokenProvider — plugin registered but doesn't implement
// TokenProvider (e.g. a non-GitHub mutator in the chain).
// Expect 404 — the GitHub App endpoint is not available.
func TestGitHubToken_NoTokenProvider(t *testing.T) {
reg := provisionhook.NewRegistry()
reg.Register(&mockMutatorOnly{name: "other-plugin"})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404 when no TokenProvider, got %d: %s", w.Code, w.Body.String())
}
}
// TestGitHubToken_ProviderError — provider returns an error (e.g. GitHub API
// unreachable). Expect 500 so the workspace credential helper retries.
func TestGitHubToken_ProviderError(t *testing.T) {
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "github-app-auth",
err: errors.New("github: 503 service unavailable"),
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500 on provider error, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if body["error"] == "" {
t.Error("expected non-empty error field in 500 response")
}
}
// TestGitHubToken_EmptyToken — provider returns no error but an empty token.
// This should never happen in normal operation but is a programming error in
// the plugin; treat it as a refresh failure.
func TestGitHubToken_EmptyToken(t *testing.T) {
exp := time.Now().Add(55 * time.Minute)
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "github-app-auth",
token: "", // empty — plugin bug
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500 for empty token, got %d: %s", w.Code, w.Body.String())
}
}
// TestGitHubToken_HappyPath — provider returns a valid token.
// Assert: 200, token present, expires_at is a valid RFC3339 timestamp
// with a positive TTL (i.e. the token is not already expired).
func TestGitHubToken_HappyPath(t *testing.T) {
exp := time.Now().UTC().Add(55 * time.Minute).Truncate(time.Second)
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "github-app-auth",
token: "ghs_TestTokenABC123",
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body struct {
Token string `json:"token"`
ExpiresAt string `json:"expires_at"`
}
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if body.Token != "ghs_TestTokenABC123" {
t.Errorf("expected token 'ghs_TestTokenABC123', got %q", body.Token)
}
parsed, err := time.Parse(time.RFC3339, body.ExpiresAt)
if err != nil {
t.Fatalf("expires_at is not valid RFC3339: %q — %v", body.ExpiresAt, err)
}
if !parsed.After(time.Now()) {
t.Errorf("expires_at %s is in the past — handler served an expired token", body.ExpiresAt)
}
}
// TestGitHubToken_FirstProviderWins — two mutators registered; only the first
// implements TokenProvider. Confirm the first one is used (registration order).
func TestGitHubToken_FirstProviderWins(t *testing.T) {
exp := time.Now().UTC().Add(55 * time.Minute)
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "first-provider",
token: "ghs_First",
expiresAt: exp,
})
reg.Register(&mockTokenMutator{
name: "second-provider",
token: "ghs_Second",
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
_ = json.Unmarshal(w.Body.Bytes(), &body)
if body["token"] != "ghs_First" {
t.Errorf("expected first provider's token 'ghs_First', got %q", body["token"])
}
}
// TestGitHubToken_NonProviderBeforeProvider — a plain EnvMutator is registered
// first, then a TokenProvider. Confirm the provider is still found (skip over
// non-providers).
func TestGitHubToken_NonProviderBeforeProvider(t *testing.T) {
exp := time.Now().UTC().Add(55 * time.Minute)
reg := provisionhook.NewRegistry()
reg.Register(&mockMutatorOnly{name: "env-injector"})
reg.Register(&mockTokenMutator{
name: "github-app-auth",
token: "ghs_FoundBehindOther",
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
_ = json.Unmarshal(w.Body.Bytes(), &body)
if body["token"] != "ghs_FoundBehindOther" {
t.Errorf("expected 'ghs_FoundBehindOther', got %q", body["token"])
}
}

View File

@ -28,9 +28,11 @@ func TestWorkspaceCreate_WithParentID(t *testing.T) {
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
parentID := "parent-ws-123"
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Child Agent", nil, 1, "langgraph", sqlmock.AnyArg(), &parentID, nil, "none").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(0), float64(0)).
WillReturnResult(sqlmock.NewResult(0, 1))
@ -61,9 +63,11 @@ func TestWorkspaceCreate_ExplicitClaudeCodeRuntime(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "CC Agent", nil, 2, "claude-code", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(10), float64(20)).
WillReturnResult(sqlmock.NewResult(0, 1))

View File

@ -248,11 +248,17 @@ func TestWorkspaceCreate(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", "/tmp/configs")
// Expect transaction begin for atomic workspace+secrets creation
mock.ExpectBegin()
// Expect workspace INSERT (uuid is dynamic, use AnyArg for id, runtime, awareness_namespace)
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Test Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WillReturnResult(sqlmock.NewResult(0, 1))
// Expect transaction commit (no secrets in this payload)
mock.ExpectCommit()
// Expect canvas_layouts INSERT
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(100), float64(200)).

View File

@ -0,0 +1,254 @@
package handlers
import (
"context"
"database/sql"
"fmt"
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// resolveOrgID returns the effective org ID for a workspace: the parent_id
// when one exists, or the workspace's own ID when it is the org root.
// Returns an empty string if the workspace is not found.
func resolveOrgID(ctx context.Context, workspaceID string) (string, error) {
var parentID sql.NullString
err := db.DB.QueryRowContext(ctx,
`SELECT parent_id FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&parentID)
if err == sql.ErrNoRows {
return "", nil
}
if err != nil {
return "", err
}
if parentID.Valid && parentID.String != "" {
return parentID.String, nil
}
return workspaceID, nil
}
// checkOrgPluginAllowlist returns (true, reason) when the plugin is blocked
// by the org's allowlist, or (false, "") when the install is permitted.
//
// Semantics:
// - No allowlist rows for this org → allow-all (backward compat).
// - Allowlist exists and plugin is on it → allowed.
// - Allowlist exists and plugin is NOT on it → blocked (403).
// - DB errors → fail-open with a log (don't block installs on DB hiccup).
func checkOrgPluginAllowlist(ctx context.Context, workspaceID, pluginName string) (blocked bool, reason string) {
orgID, err := resolveOrgID(ctx, workspaceID)
if err != nil {
log.Printf("allowlist: resolveOrgID(%s) failed: %v — allowing install", workspaceID, err)
return false, ""
}
if orgID == "" {
return false, "" // workspace not found; let later checks handle it
}
var allowed bool
err = db.DB.QueryRowContext(ctx, `
SELECT EXISTS(
SELECT 1 FROM org_plugin_allowlist
WHERE org_id = $1 AND plugin_name = $2
)
`, orgID, pluginName).Scan(&allowed)
if err != nil {
log.Printf("allowlist: existence check failed (org=%s plugin=%s): %v — allowing install", orgID, pluginName, err)
return false, ""
}
if allowed {
return false, "" // explicitly on the allowlist
}
// Check whether an allowlist exists at all. Empty allowlist = allow-all.
var count int
if err := db.DB.QueryRowContext(ctx,
`SELECT COUNT(*) FROM org_plugin_allowlist WHERE org_id = $1`,
orgID,
).Scan(&count); err != nil {
log.Printf("allowlist: count check failed (org=%s): %v — allowing install", orgID, err)
return false, ""
}
if count == 0 {
return false, "" // no allowlist configured — allow-all
}
return true, fmt.Sprintf("plugin %q is not in the org allowlist", pluginName)
}
// OrgPluginAllowlistHandler manages the per-org plugin governance registry.
type OrgPluginAllowlistHandler struct{}
// NewOrgPluginAllowlistHandler constructs an OrgPluginAllowlistHandler.
func NewOrgPluginAllowlistHandler() *OrgPluginAllowlistHandler {
return &OrgPluginAllowlistHandler{}
}
// allowlistEntry is the JSON shape for a single allowlist record.
type allowlistEntry struct {
PluginName string `json:"plugin_name"`
EnabledBy string `json:"enabled_by"`
EnabledAt time.Time `json:"enabled_at"`
}
// putAllowlistRequest is the request body for PUT /orgs/:id/plugins/allowlist.
// Plugins holds the complete desired allowlist; the handler replaces the
// current entries atomically. An empty slice clears the allowlist (allow-all).
type putAllowlistRequest struct {
Plugins []string `json:"plugins"`
EnabledBy string `json:"enabled_by"` // workspace ID of the admin performing the change
}
// GetAllowlist handles GET /orgs/:id/plugins/allowlist.
//
// Returns the current allowlist for the org workspace identified by :id.
// An empty array means no allowlist is configured (allow-all). Auth: AdminAuth.
func (h *OrgPluginAllowlistHandler) GetAllowlist(c *gin.Context) {
orgID := c.Param("id")
ctx := c.Request.Context()
// Verify the org workspace exists.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
orgID,
).Scan(&exists); err != nil {
log.Printf("allowlist: org check failed for %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify org"})
return
}
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "org not found"})
return
}
rows, err := db.DB.QueryContext(ctx, `
SELECT plugin_name, enabled_by, enabled_at
FROM org_plugin_allowlist
WHERE org_id = $1
ORDER BY plugin_name
`, orgID)
if err != nil {
log.Printf("allowlist: query failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch allowlist"})
return
}
defer rows.Close()
entries := make([]allowlistEntry, 0)
for rows.Next() {
var e allowlistEntry
if err := rows.Scan(&e.PluginName, &e.EnabledBy, &e.EnabledAt); err != nil {
log.Printf("allowlist: scan error for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read allowlist"})
return
}
entries = append(entries, e)
}
if err := rows.Err(); err != nil {
log.Printf("allowlist: rows error for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read allowlist"})
return
}
c.JSON(http.StatusOK, gin.H{
"org_id": orgID,
"plugins": entries,
"allow_all": len(entries) == 0,
})
}
// PutAllowlist handles PUT /orgs/:id/plugins/allowlist.
//
// Replaces the org's allowlist atomically with the supplied plugin names.
// Sending an empty plugins array clears the allowlist (reverts to allow-all).
// Auth: AdminAuth.
func (h *OrgPluginAllowlistHandler) PutAllowlist(c *gin.Context) {
orgID := c.Param("id")
ctx := c.Request.Context()
var req putAllowlistRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.EnabledBy == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "enabled_by is required"})
return
}
// Validate each plugin name for safety before touching the DB.
for _, name := range req.Plugins {
if err := validatePluginName(name); err != nil {
c.JSON(http.StatusBadRequest, gin.H{
"error": "invalid plugin name",
"plugin_name": name,
"detail": err.Error(),
})
return
}
}
// Verify the org workspace exists.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
orgID,
).Scan(&exists); err != nil {
log.Printf("allowlist: org check failed for %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify org"})
return
}
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "org not found"})
return
}
// Replace atomically: delete all current entries, then insert the new set.
tx, err := db.DB.BeginTx(ctx, nil)
if err != nil {
log.Printf("allowlist: begin tx failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start transaction"})
return
}
defer tx.Rollback() //nolint:errcheck // superseded by Commit on success path
if _, err := tx.ExecContext(ctx,
`DELETE FROM org_plugin_allowlist WHERE org_id = $1`,
orgID,
); err != nil {
log.Printf("allowlist: delete failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update allowlist"})
return
}
for _, name := range req.Plugins {
if _, err := tx.ExecContext(ctx, `
INSERT INTO org_plugin_allowlist (org_id, plugin_name, enabled_by)
VALUES ($1, $2, $3)
ON CONFLICT (org_id, plugin_name) DO NOTHING
`, orgID, name, req.EnabledBy); err != nil {
log.Printf("allowlist: insert %q failed for org %s: %v", name, orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update allowlist"})
return
}
}
if err := tx.Commit(); err != nil {
log.Printf("allowlist: commit failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to commit allowlist update"})
return
}
c.JSON(http.StatusOK, gin.H{
"org_id": orgID,
"plugins": req.Plugins,
"allow_all": len(req.Plugins) == 0,
})
}

View File

@ -0,0 +1,555 @@
package handlers
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// ─── helpers ───────────────────────────────────────────────────────────────
func newAllowlistGET(orgID string) (*httptest.ResponseRecorder, *gin.Context) {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: orgID}}
c.Request = httptest.NewRequest(http.MethodGet, "/orgs/"+orgID+"/plugins/allowlist", nil)
return w, c
}
func newAllowlistPUT(orgID string, body interface{}) (*httptest.ResponseRecorder, *gin.Context) {
b, _ := json.Marshal(body)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: orgID}}
c.Request = httptest.NewRequest(http.MethodPut, "/orgs/"+orgID+"/plugins/allowlist",
bytes.NewReader(b))
c.Request.Header.Set("Content-Type", "application/json")
return w, c
}
// ─── GetAllowlist ──────────────────────────────────────────────────────────
func TestGetAllowlist_OrgNotFound(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-missing").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-missing")
h.GetAllowlist(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
}
}
func TestGetAllowlist_DBErrorOnOrgCheck(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnError(sql.ErrConnDone)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
}
}
func TestGetAllowlist_Empty(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "enabled_by", "enabled_at"}))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
OrgID string `json:"org_id"`
Plugins []allowlistEntry `json:"plugins"`
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if resp.OrgID != "org-1" {
t.Errorf("expected org_id=org-1, got %q", resp.OrgID)
}
if len(resp.Plugins) != 0 {
t.Errorf("expected 0 plugins, got %d", len(resp.Plugins))
}
if !resp.AllowAll {
t.Error("expected allow_all=true for empty list")
}
}
func TestGetAllowlist_WithEntries(t *testing.T) {
mock := setupTestDB(t)
ts := time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "enabled_by", "enabled_at"}).
AddRow("browser-automation", "admin-ws", ts).
AddRow("superpowers", "admin-ws", ts))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
OrgID string `json:"org_id"`
Plugins []allowlistEntry `json:"plugins"`
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if len(resp.Plugins) != 2 {
t.Fatalf("expected 2 plugins, got %d", len(resp.Plugins))
}
if resp.Plugins[0].PluginName != "browser-automation" {
t.Errorf("expected first plugin=browser-automation, got %q", resp.Plugins[0].PluginName)
}
if resp.AllowAll {
t.Error("expected allow_all=false when list is non-empty")
}
}
func TestGetAllowlist_DBErrorOnQuery(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
WithArgs("org-1").
WillReturnError(sql.ErrConnDone)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
}
}
// ─── PutAllowlist ──────────────────────────────────────────────────────────
func TestPutAllowlist_MissingEnabledBy(t *testing.T) {
setupTestDB(t)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"my-plugin"},
// enabled_by intentionally omitted
})
h.PutAllowlist(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_InvalidPluginName(t *testing.T) {
setupTestDB(t)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"../../evil"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("expected 400 for invalid plugin name, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_OrgNotFound(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-missing").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-missing", map[string]interface{}{
"plugins": []string{"my-plugin"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_AddPlugins(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "my-plugin", "admin-ws").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"my-plugin"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
OrgID string `json:"org_id"`
Plugins []string `json:"plugins"`
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if len(resp.Plugins) != 1 || resp.Plugins[0] != "my-plugin" {
t.Errorf("unexpected plugins: %v", resp.Plugins)
}
if resp.AllowAll {
t.Error("expected allow_all=false for non-empty plugins list")
}
}
func TestPutAllowlist_ClearAllowlist(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 3))
// No INSERT expected — empty plugins slice.
mock.ExpectCommit()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if !resp.AllowAll {
t.Error("expected allow_all=true after clearing all plugins")
}
}
func TestPutAllowlist_MultiplePlugins(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "browser-automation", "admin-ws").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "superpowers", "admin-ws").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"browser-automation", "superpowers"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_InsertFails(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "my-plugin", "admin-ws").
WillReturnError(sql.ErrConnDone)
mock.ExpectRollback()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"my-plugin"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500 on insert failure, got %d: %s", w.Code, w.Body.String())
}
}
// ─── resolveOrgID ──────────────────────────────────────────────────────────
func TestResolveOrgID_OrgRoot(t *testing.T) {
mock := setupTestDB(t)
// workspace has no parent → it IS the org root
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-root").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
got, err := resolveOrgID(context.Background(), "ws-root")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != "ws-root" {
t.Errorf("expected ws-root, got %q", got)
}
}
func TestResolveOrgID_WithParent(t *testing.T) {
mock := setupTestDB(t)
// workspace has a parent → parent is the org root
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-child").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
got, err := resolveOrgID(context.Background(), "ws-child")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != "ws-parent" {
t.Errorf("expected ws-parent, got %q", got)
}
}
func TestResolveOrgID_NotFound(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-ghost").
WillReturnError(sql.ErrNoRows)
got, err := resolveOrgID(context.Background(), "ws-ghost")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != "" {
t.Errorf("expected empty string for not-found workspace, got %q", got)
}
}
// ─── checkOrgPluginAllowlist ───────────────────────────────────────────────
func TestCheckOrgPluginAllowlist_AllowAll_EmptyList(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: no parent → ws-1 is org root
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// plugin NOT in list
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "my-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// count = 0 → allow-all
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "my-plugin")
if blocked {
t.Errorf("expected not blocked (allow-all), got blocked: %s", reason)
}
}
func TestCheckOrgPluginAllowlist_Allowed_OnList(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: no parent
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// plugin IS in the allowlist
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "my-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "my-plugin")
if blocked {
t.Errorf("expected not blocked (on list), got blocked: %s", reason)
}
}
func TestCheckOrgPluginAllowlist_Blocked_NotOnList(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: no parent
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// plugin NOT in the list
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "evil-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// count > 0 → allowlist is active
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(2))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "evil-plugin")
if !blocked {
t.Error("expected plugin to be blocked (not on non-empty allowlist)")
}
if reason == "" {
t.Error("expected non-empty reason when blocked")
}
}
func TestCheckOrgPluginAllowlist_ChildWorkspace_UsesParentOrg(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: ws-child has parent ws-parent
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-child").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
// allowlist check uses parent org ID (ws-parent)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-parent", "my-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-child", "my-plugin")
if blocked {
t.Errorf("expected not blocked (on parent's allowlist), got blocked: %s", reason)
}
}
func TestCheckOrgPluginAllowlist_FailOpen_OnResolveError(t *testing.T) {
mock := setupTestDB(t)
// DB error during resolveOrgID → fail-open
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnError(sql.ErrConnDone)
blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
if blocked {
t.Error("expected fail-open (not blocked) on DB error during resolveOrgID")
}
}
func TestCheckOrgPluginAllowlist_FailOpen_OnExistsError(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// DB error on EXISTS check → fail-open
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "any-plugin").
WillReturnError(sql.ErrConnDone)
blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
if blocked {
t.Error("expected fail-open (not blocked) on DB error during EXISTS check")
}
}
func TestCheckOrgPluginAllowlist_FailOpen_OnCountError(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "any-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// DB error on COUNT check → fail-open
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
WithArgs("ws-1").
WillReturnError(sql.ErrConnDone)
blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
if blocked {
t.Error("expected fail-open (not blocked) on DB error during COUNT check")
}
}

View File

@ -63,6 +63,14 @@ func (h *PluginsHandler) Install(c *gin.Context) {
// has already cleaned it up (and its returned result is nil).
defer os.RemoveAll(result.StagedDir)
// Org plugin allowlist gate (#591).
// If the workspace's org has a non-empty allowlist, the plugin must be
// on it. An empty allowlist means allow-all (backward compat).
if blocked, reason := checkOrgPluginAllowlist(ctx, workspaceID, result.PluginName); blocked {
c.JSON(http.StatusForbidden, gin.H{"error": reason})
return
}
if err := h.deliverToContainer(ctx, workspaceID, result); err != nil {
var he *httpErr
if errors.As(err, &he) {

View File

@ -0,0 +1,107 @@
package handlers
import (
"encoding/json"
"fmt"
"log"
"net/http"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/gin-gonic/gin"
)
// aguiEvent is the AG-UI envelope written to the SSE stream.
// Spec: {"type":"<event_name>","timestamp":<unix_ms>,"data":{...}}
type aguiEvent struct {
Type string `json:"type"`
Timestamp int64 `json:"timestamp"` // Unix milliseconds
Data json.RawMessage `json:"data"`
}
// SSEHandler streams workspace events as AG-UI-compatible Server-Sent Events.
type SSEHandler struct {
broadcaster *events.Broadcaster
}
// NewSSEHandler returns an SSEHandler that sources events from b.
func NewSSEHandler(b *events.Broadcaster) *SSEHandler {
return &SSEHandler{broadcaster: b}
}
// StreamEvents handles GET /workspaces/:id/events/stream.
//
// Authentication is enforced by the upstream WorkspaceAuth middleware (bearer
// token bound to :id). This handler only needs to:
// 1. Verify the workspace exists (returns 404 if not).
// 2. Set SSE headers.
// 3. Subscribe to the in-process broadcaster and relay events until the
// client disconnects (context cancellation).
//
// AG-UI envelope per event:
//
// data: {"type":"<event>","timestamp":<unix_ms>,"data":{...}}\n\n
func (h *SSEHandler) StreamEvents(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Verify the workspace exists — 404 early rather than serving an empty stream.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
workspaceID,
).Scan(&exists); err != nil {
log.Printf("SSE: workspace existence check failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify workspace"})
return
}
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
// SSE response headers.
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
// Instruct nginx / reverse-proxies to disable buffering so events reach
// the client immediately rather than being held in a proxy buffer.
c.Header("X-Accel-Buffering", "no")
flusher, ok := c.Writer.(http.Flusher)
if !ok {
// Should never happen with gin's responseWriter, but guard defensively.
c.JSON(http.StatusInternalServerError, gin.H{"error": "streaming not supported"})
return
}
ch, cancel := h.broadcaster.SubscribeSSE(workspaceID)
defer cancel()
// Send an initial SSE comment so the client knows the stream is live.
fmt.Fprintf(c.Writer, ": ping\n\n")
flusher.Flush()
for {
select {
case <-ctx.Done():
return
case msg, ok := <-ch:
if !ok {
return
}
env := aguiEvent{
Type: msg.Event,
Timestamp: msg.Timestamp.UnixMilli(),
Data: msg.Payload,
}
b, err := json.Marshal(env)
if err != nil {
log.Printf("SSE: marshal error for workspace %s event %s: %v", workspaceID, msg.Event, err)
continue
}
fmt.Fprintf(c.Writer, "data: %s\n\n", b)
flusher.Flush()
}
}
}

View File

@ -0,0 +1,237 @@
package handlers
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// expectWorkspaceExists queues the EXISTS query that StreamEvents fires first.
func expectWorkspaceExists(mock sqlmock.Sqlmock, workspaceID string, exists bool) {
rows := sqlmock.NewRows([]string{"exists"}).AddRow(exists)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs(workspaceID).
WillReturnRows(rows)
}
// runSSEHandler starts StreamEvents in a background goroutine using a
// cancellable context, waits waitAfterStart for the handler to subscribe,
// then returns a drain function (cancel + wait for goroutine exit).
func runSSEHandler(t *testing.T, h *SSEHandler, workspaceID string) (
w *httptest.ResponseRecorder,
inject func(), // call to cancel immediately
done <-chan struct{},
) {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
w = httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: workspaceID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+workspaceID+"/events/stream", nil).WithContext(ctx)
doneCh := make(chan struct{})
go func() {
defer close(doneCh)
h.StreamEvents(c)
}()
return w, cancel, doneCh
}
// TestSSE_ContentType verifies the handler sets text/event-stream on the response.
func TestSSE_ContentType(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
// Allow the handler to subscribe, then tear it down.
time.Sleep(30 * time.Millisecond)
cancel()
<-done
ct := w.Header().Get("Content-Type")
if !strings.HasPrefix(ct, "text/event-stream") {
t.Errorf("expected Content-Type text/event-stream, got %q", ct)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_InitialPing verifies the handler emits the ": ping" SSE comment on connect.
func TestSSE_InitialPing(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
time.Sleep(30 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
if !strings.Contains(body, ": ping") {
t.Errorf("expected SSE ping comment, body was:\n%s", body)
}
}
// TestSSE_AGUIFormat verifies that a broadcast event is wrapped in the AG-UI envelope.
func TestSSE_AGUIFormat(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
// Wait for the handler goroutine to reach its select loop.
time.Sleep(30 * time.Millisecond)
b.BroadcastOnly("ws-1", "TASK_UPDATED", map[string]string{"status": "running"})
time.Sleep(30 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
// Find the first "data: ..." line.
var dataLine string
for _, line := range strings.Split(body, "\n") {
if strings.HasPrefix(line, "data: ") {
dataLine = strings.TrimPrefix(line, "data: ")
break
}
}
if dataLine == "" {
t.Fatalf("no data: line found in SSE response:\n%s", body)
}
var env struct {
Type string `json:"type"`
Timestamp int64 `json:"timestamp"`
Data json.RawMessage `json:"data"`
}
if err := json.Unmarshal([]byte(dataLine), &env); err != nil {
t.Fatalf("invalid AG-UI envelope JSON %q: %v", dataLine, err)
}
if env.Type != "TASK_UPDATED" {
t.Errorf("expected type TASK_UPDATED, got %q", env.Type)
}
if env.Timestamp <= 0 {
t.Errorf("expected positive timestamp, got %d", env.Timestamp)
}
if len(env.Data) == 0 || string(env.Data) == "null" {
t.Errorf("expected non-null data field, got %q", string(env.Data))
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_WorkspaceFilter verifies that events for a different workspace are NOT delivered.
func TestSSE_WorkspaceFilter(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
time.Sleep(30 * time.Millisecond)
// Broadcast to a completely different workspace.
b.BroadcastOnly("ws-99", "AGENT_MESSAGE", map[string]string{"text": "secret"})
time.Sleep(30 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
for _, line := range strings.Split(body, "\n") {
if strings.HasPrefix(line, "data: ") {
t.Errorf("expected no data: events for different workspace, got: %s", line)
}
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_WorkspaceNotFound verifies a 404 is returned when the workspace does not exist.
func TestSSE_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "missing-ws", false)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "missing-ws"}}
c.Request = httptest.NewRequest("GET", "/workspaces/missing-ws/events/stream", nil)
h.StreamEvents(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404 for missing workspace, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_MultipleEventsDelivered verifies multiple sequential broadcasts all arrive.
func TestSSE_MultipleEventsDelivered(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
time.Sleep(30 * time.Millisecond)
b.BroadcastOnly("ws-1", "AGENT_MESSAGE", map[string]string{"msg": "one"})
b.BroadcastOnly("ws-1", "TASK_UPDATED", map[string]string{"status": "done"})
b.BroadcastOnly("ws-1", "A2A_RESPONSE", map[string]string{"result": "ok"})
time.Sleep(50 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
var dataLines []string
for _, line := range strings.Split(body, "\n") {
if strings.HasPrefix(line, "data: ") {
dataLines = append(dataLines, line)
}
}
if len(dataLines) != 3 {
t.Errorf("expected 3 data: lines, got %d:\n%s", len(dataLines), body)
}
// Verify event types appear in order.
expectedTypes := []string{"AGENT_MESSAGE", "TASK_UPDATED", "A2A_RESPONSE"}
for i, dl := range dataLines {
var env struct {
Type string `json:"type"`
}
if err := json.Unmarshal([]byte(strings.TrimPrefix(dl, "data: ")), &env); err != nil {
t.Fatalf("line %d: invalid JSON: %v", i, err)
}
if env.Type != expectedTypes[i] {
t.Errorf("line %d: expected type %s, got %s", i, expectedTypes[i], env.Type)
}
}
}

View File

@ -10,6 +10,7 @@ import (
"path/filepath"
"strings"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
@ -59,6 +60,14 @@ func (h *WorkspaceHandler) SetEnvMutators(r *provisionhook.Registry) {
h.envMutators = r
}
// TokenRegistry returns the provisionhook.Registry so the router can
// wire the GET /admin/github-installation-token handler without coupling
// to WorkspaceHandler's internals. Returns nil when no plugin has been
// registered (dev / self-hosted deployments without a GitHub App).
func (h *WorkspaceHandler) TokenRegistry() *provisionhook.Registry {
return h.envMutators
}
// Create handles POST /workspaces
func (h *WorkspaceHandler) Create(c *gin.Context) {
var payload models.CreateWorkspacePayload
@ -129,17 +138,59 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
return
}
// Insert workspace with runtime persisted in DB
_, err := db.DB.ExecContext(ctx, `
// Begin a transaction so the workspace row and any initial secrets are
// committed atomically. A secret-encrypt or DB error rolls back the
// workspace insert so we never leave a workspace row with missing secrets.
tx, txErr := db.DB.BeginTx(ctx, nil)
if txErr != nil {
log.Printf("Create workspace: begin tx error: %v", txErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
return
}
// Insert workspace with runtime persisted in DB (inside transaction)
_, err := tx.ExecContext(ctx, `
INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access)
VALUES ($1, $2, $3, $4, $5, $6, 'provisioning', $7, $8, $9)
`, id, payload.Name, role, payload.Tier, payload.Runtime, awarenessNamespace, payload.ParentID, workspaceDir, workspaceAccess)
if err != nil {
tx.Rollback() //nolint:errcheck
log.Printf("Create workspace error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
return
}
// Persist initial secrets from the create payload (inside same transaction).
// nil/empty map is a no-op. Any failure rolls back the workspace insert
// so we never have a workspace row without its intended secrets.
for k, v := range payload.Secrets {
encrypted, encErr := crypto.Encrypt([]byte(v))
if encErr != nil {
tx.Rollback() //nolint:errcheck
log.Printf("Create workspace %s: failed to encrypt secret %q: %v", id, k, encErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encrypt secret: " + k})
return
}
version := crypto.CurrentEncryptionVersion()
if _, dbErr := tx.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, $2, $3, $4)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $3, encryption_version = $4, updated_at = now()
`, id, k, encrypted, version); dbErr != nil {
tx.Rollback() //nolint:errcheck
log.Printf("Create workspace %s: failed to persist secret %q: %v", id, k, dbErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save secret: " + k})
return
}
}
if commitErr := tx.Commit(); commitErr != nil {
log.Printf("Create workspace %s: transaction commit failed: %v", id, commitErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
return
}
// Insert canvas layout — non-fatal: workspace can be dragged into position later
if _, err := db.DB.ExecContext(ctx, `
INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)

View File

@ -0,0 +1,125 @@
package handlers
import (
"context"
"database/sql"
"fmt"
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// Pricing constants — Claude Sonnet default rates (USD per token).
// Callers with different models should override via env vars in a future phase.
const (
tokenCostPerInputToken = 0.000003 // $3 / 1M input tokens
tokenCostPerOutputToken = 0.000015 // $15 / 1M output tokens
)
// MetricsHandler serves GET /workspaces/:id/metrics.
type MetricsHandler struct{}
// NewMetricsHandler returns a MetricsHandler.
func NewMetricsHandler() *MetricsHandler { return &MetricsHandler{} }
// GetMetrics handles GET /workspaces/:id/metrics.
//
// Returns aggregated LLM token usage for the current UTC day.
// Auth: WorkspaceAuth middleware (bearer token bound to :id).
//
// Response:
//
// {
// "input_tokens": <N>,
// "output_tokens": <N>,
// "total_calls": <N>,
// "estimated_cost_usd": "0.000000",
// "period_start": "2026-04-17T00:00:00Z",
// "period_end": "2026-04-18T00:00:00Z"
// }
func (h *MetricsHandler) GetMetrics(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Verify workspace exists — 404 before touching usage table.
var wsExists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
workspaceID,
).Scan(&wsExists); err != nil {
log.Printf("metrics: workspace check failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify workspace"})
return
}
if !wsExists {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
periodStart := todayUTC()
periodEnd := periodStart.Add(24 * time.Hour)
var inputTokens, outputTokens int64
var callCount int64
var estimatedCost float64
err := db.DB.QueryRowContext(ctx, `
SELECT
COALESCE(SUM(input_tokens), 0),
COALESCE(SUM(output_tokens), 0),
COALESCE(SUM(call_count), 0),
COALESCE(SUM(estimated_cost_usd), 0)
FROM workspace_token_usage
WHERE workspace_id = $1
AND period_start = $2
`, workspaceID, periodStart).Scan(&inputTokens, &outputTokens, &callCount, &estimatedCost)
if err != nil && err != sql.ErrNoRows {
log.Printf("metrics: query failed for workspace %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch metrics"})
return
}
c.JSON(http.StatusOK, gin.H{
"input_tokens": inputTokens,
"output_tokens": outputTokens,
"total_calls": callCount,
"estimated_cost_usd": fmt.Sprintf("%.6f", estimatedCost),
"period_start": periodStart.Format(time.RFC3339),
"period_end": periodEnd.Format(time.RFC3339),
})
}
// todayUTC returns the start of the current UTC day (midnight).
func todayUTC() time.Time {
now := time.Now().UTC()
return time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC)
}
// upsertTokenUsage accumulates input/output token counts for workspaceID's
// current UTC day. Cost is estimated using the default per-token pricing
// constants. Always call in a detached goroutine — never block the A2A path.
func upsertTokenUsage(ctx context.Context, workspaceID string, inputTokens, outputTokens int64) {
if inputTokens == 0 && outputTokens == 0 {
return
}
periodStart := todayUTC()
cost := float64(inputTokens)*tokenCostPerInputToken + float64(outputTokens)*tokenCostPerOutputToken
_, err := db.DB.ExecContext(ctx, `
INSERT INTO workspace_token_usage
(workspace_id, period_start, input_tokens, output_tokens, call_count, estimated_cost_usd, updated_at)
VALUES ($1, $2, $3, $4, 1, $5, NOW())
ON CONFLICT (workspace_id, period_start) DO UPDATE SET
input_tokens = workspace_token_usage.input_tokens + EXCLUDED.input_tokens,
output_tokens = workspace_token_usage.output_tokens + EXCLUDED.output_tokens,
call_count = workspace_token_usage.call_count + 1,
estimated_cost_usd = workspace_token_usage.estimated_cost_usd + EXCLUDED.estimated_cost_usd,
updated_at = NOW()
`, workspaceID, periodStart, inputTokens, outputTokens, cost)
if err != nil {
log.Printf("upsertTokenUsage: failed for workspace %s: %v", workspaceID, err)
}
}

View File

@ -0,0 +1,262 @@
package handlers
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// usageColumns matches the SELECT in GetMetrics.
var usageColumns = []string{
"sum_input_tokens", "sum_output_tokens", "sum_call_count", "sum_cost",
}
// expectWorkspaceExistsMetrics queues the EXISTS check in GetMetrics.
func expectWorkspaceExistsMetrics(mock sqlmock.Sqlmock, workspaceID string, exists bool) {
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(exists))
}
// TestGetMetrics_HappyPath verifies the handler returns correct aggregated data.
func TestGetMetrics_HappyPath(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ws-1", true)
// Simulate one row with usage data.
mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
WithArgs("ws-1", sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows(usageColumns).
AddRow(int64(1500), int64(300), int64(5), float64(0.009)))
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
TotalCalls int64 `json:"total_calls"`
EstimatedCost string `json:"estimated_cost_usd"`
PeriodStart string `json:"period_start"`
PeriodEnd string `json:"period_end"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON: %v\n%s", err, w.Body.String())
}
if resp.InputTokens != 1500 {
t.Errorf("expected input_tokens=1500, got %d", resp.InputTokens)
}
if resp.OutputTokens != 300 {
t.Errorf("expected output_tokens=300, got %d", resp.OutputTokens)
}
if resp.TotalCalls != 5 {
t.Errorf("expected total_calls=5, got %d", resp.TotalCalls)
}
if resp.EstimatedCost == "" {
t.Error("expected non-empty estimated_cost_usd")
}
if resp.PeriodStart == "" {
t.Error("expected non-empty period_start")
}
if resp.PeriodEnd == "" {
t.Error("expected non-empty period_end")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestGetMetrics_WorkspaceNotFound verifies a 404 when workspace is absent.
func TestGetMetrics_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ghost", false)
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ghost"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ghost/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestGetMetrics_EmptyPeriod verifies the handler returns zeros when no usage exists yet.
func TestGetMetrics_EmptyPeriod(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ws-new", true)
// COALESCE returns 0 for each column when no rows match.
mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
WithArgs("ws-new", sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows(usageColumns).
AddRow(int64(0), int64(0), int64(0), float64(0)))
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-new"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-new/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON: %v", err)
}
// Verify period_start and period_end are present and distinct.
ps, _ := resp["period_start"].(string)
pe, _ := resp["period_end"].(string)
if ps == "" || pe == "" {
t.Errorf("expected non-empty period_start/period_end, got %q / %q", ps, pe)
}
if ps == pe {
t.Errorf("period_start and period_end must differ, both are %q", ps)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestGetMetrics_CostFormat verifies estimated_cost_usd is formatted to 6 decimal places.
func TestGetMetrics_CostFormat(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ws-1", true)
mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
WithArgs("ws-1", sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows(usageColumns).
AddRow(int64(1000000), int64(0), int64(1), float64(3.0)))
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON: %v", err)
}
cost, _ := resp["estimated_cost_usd"].(string)
if len(cost) < 8 {
// "3.000000" is 8 chars minimum
t.Errorf("expected at least 8-char cost string, got %q", cost)
}
}
// ---- parseUsageFromA2AResponse tests ----
func TestParseUsage_JSONRPCResultEnvelope(t *testing.T) {
body := []byte(`{
"jsonrpc": "2.0",
"id": "abc",
"result": {
"usage": {
"input_tokens": 100,
"output_tokens": 50
}
}
}`)
in, out := parseUsageFromA2AResponse(body)
if in != 100 {
t.Errorf("expected input_tokens=100, got %d", in)
}
if out != 50 {
t.Errorf("expected output_tokens=50, got %d", out)
}
}
func TestParseUsage_TopLevelUsage(t *testing.T) {
body := []byte(`{
"usage": {
"input_tokens": 200,
"output_tokens": 75
}
}`)
in, out := parseUsageFromA2AResponse(body)
if in != 200 {
t.Errorf("expected input_tokens=200, got %d", in)
}
if out != 75 {
t.Errorf("expected output_tokens=75, got %d", out)
}
}
func TestParseUsage_NoUsageField(t *testing.T) {
body := []byte(`{"jsonrpc":"2.0","id":"x","result":{"message":"hello"}}`)
in, out := parseUsageFromA2AResponse(body)
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) with no usage field, got (%d, %d)", in, out)
}
}
func TestParseUsage_ZeroTokensIgnored(t *testing.T) {
body := []byte(`{"result":{"usage":{"input_tokens":0,"output_tokens":0}}}`)
in, out := parseUsageFromA2AResponse(body)
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) for zero tokens, got (%d, %d)", in, out)
}
}
func TestParseUsage_EmptyBody(t *testing.T) {
in, out := parseUsageFromA2AResponse([]byte{})
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) for empty body, got (%d, %d)", in, out)
}
}
func TestParseUsage_InvalidJSON(t *testing.T) {
in, out := parseUsageFromA2AResponse([]byte("not json"))
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) for invalid JSON, got (%d, %d)", in, out)
}
}
func TestParseUsage_NestedResultPreferredOverTopLevel(t *testing.T) {
// result.usage should be preferred over top-level usage.
body := []byte(`{
"usage": {"input_tokens": 999, "output_tokens": 999},
"result": {
"usage": {"input_tokens": 42, "output_tokens": 21}
}
}`)
in, out := parseUsageFromA2AResponse(body)
if in != 42 {
t.Errorf("expected result.usage.input_tokens=42, got %d", in)
}
if out != 21 {
t.Errorf("expected result.usage.output_tokens=21, got %d", out)
}
}

View File

@ -146,10 +146,12 @@ func TestWorkspaceCreate_DBInsertError(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// Workspace INSERT fails
// Transaction begins, workspace INSERT fails, transaction is rolled back.
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Failing Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WillReturnError(sql.ErrConnDone)
mock.ExpectRollback()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -175,10 +177,13 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// Transaction wraps the workspace INSERT (no secrets in this request).
mock.ExpectBegin()
// Expect workspace INSERT with defaulted tier=1, runtime="langgraph"
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Default Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
// Expect canvas_layouts INSERT (x=0, y=0 — defaults)
mock.ExpectExec("INSERT INTO canvas_layouts").
@ -215,6 +220,117 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) {
}
}
// TestWorkspaceCreate_WithSecrets_Persists asserts that secrets in the create
// payload are written to workspace_secrets inside the same transaction as the
// workspace row, and that the handler returns 201.
func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
// External workspace: simplest code path — no provisioner goroutine.
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Hermes Agent", nil, 1, "hermes", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WillReturnResult(sqlmock.NewResult(0, 1))
// Secret inserted inside the same transaction.
mock.ExpectExec("INSERT INTO workspace_secrets").
WithArgs(sqlmock.AnyArg(), "HERMES_API_KEY", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
// canvas_layouts (non-fatal, outside tx)
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Hermes Agent","runtime":"hermes","external":true,"secrets":{"HERMES_API_KEY":"sk-test-123"}}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestWorkspaceCreate_SecretPersistFails_RollsBack asserts that a DB error
// while persisting a secret causes the entire transaction to roll back and
// the handler to return 500. The workspace row must NOT be committed.
func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnError(sql.ErrConnDone) // DB failure while writing secret
mock.ExpectRollback() // workspace insert must be rolled back
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Rollback Agent","secrets":{"OPENAI_API_KEY":"sk-fail"}}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected status 500, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestWorkspaceCreate_EmptySecrets_OK asserts that an empty secrets map (or
// no secrets key at all) creates the workspace normally without touching
// workspace_secrets.
func TestWorkspaceCreate_EmptySecrets_OK(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
// No ExpectExec for workspace_secrets — empty map must be a no-op.
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"No Secrets Agent","external":true,"secrets":{}}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ==================== GET /workspaces (List) ====================
func TestWorkspaceList_Empty(t *testing.T) {

View File

@ -81,6 +81,13 @@ func TenantGuardWithOrgID(configuredOrgID string) gin.HandlerFunc {
c.Next()
return
}
// Tertiary: same-origin Canvas requests on tenant EC2 instances where
// Caddy serves Canvas (:3000) and API (:8080) under the same domain.
// CANVAS_PROXY_URL is set → Referer/Origin matches Host → trusted.
if isSameOriginCanvas(c) {
c.Next()
return
}
// 404 not 403 — existence of this tenant must not be inferable by
// probing other orgs' machines.
c.AbortWithStatus(404)

View File

@ -133,6 +133,64 @@ func TestOrgIDFromReplaySrc(t *testing.T) {
}
}
// Same-origin Canvas bypass: when CANVAS_PROXY_URL is set and Referer matches
// Host, the request is from the co-served Canvas and should pass through.
func TestTenantGuard_SameOriginCanvasBypass(t *testing.T) {
origActive := canvasProxyActive
canvasProxyActive = true
defer func() { canvasProxyActive = origActive }()
r := newGuardedRouter("org-abc")
req := httptest.NewRequest("GET", "/workspaces", nil)
req.Host = "molecule1.moleculesai.app"
req.Header.Set("Referer", "https://molecule1.moleculesai.app/")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 200 {
t.Errorf("same-origin canvas: expected 200, got %d", w.Code)
}
}
// Same-origin Canvas bypass via Origin header (WebSocket upgrade path).
func TestTenantGuard_SameOriginCanvasViaOrigin(t *testing.T) {
origActive := canvasProxyActive
canvasProxyActive = true
defer func() { canvasProxyActive = origActive }()
r := newGuardedRouter("org-abc")
req := httptest.NewRequest("GET", "/workspaces", nil)
req.Host = "molecule1.moleculesai.app"
req.Header.Set("Origin", "https://molecule1.moleculesai.app")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 200 {
t.Errorf("same-origin canvas via Origin: expected 200, got %d", w.Code)
}
}
// Same-origin Canvas bypass must NOT work when CANVAS_PROXY_URL is unset.
func TestTenantGuard_SameOriginCanvasInactiveWithoutEnv(t *testing.T) {
origActive := canvasProxyActive
canvasProxyActive = false
defer func() { canvasProxyActive = origActive }()
r := newGuardedRouter("org-abc")
req := httptest.NewRequest("GET", "/workspaces", nil)
req.Host = "molecule1.moleculesai.app"
req.Header.Set("Referer", "https://molecule1.moleculesai.app/")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 404 {
t.Errorf("same-origin canvas without CANVAS_PROXY_URL: expected 404, got %d", w.Code)
}
}
// The allowlist is exact-match, not prefix. "/health/debug" must NOT bypass.
func TestTenantGuard_AllowlistIsExactMatch(t *testing.T) {
gin.SetMode(gin.TestMode)

View File

@ -67,10 +67,17 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
// Same lazy-bootstrap contract as WorkspaceAuth: if no live token exists
// anywhere on the platform (fresh install / pre-Phase-30 upgrade), requests
// are let through so existing deployments keep working. Once any workspace
// has a live token every request to these routes MUST present a valid one.
// has a live token every request to these routes MUST present a valid bearer
// token — no Origin-based bypass. (#623)
//
// Any valid workspace bearer token is accepted — the route is not scoped to
// a specific workspace so we only verify the token is live and unrevoked.
//
// NOTE: canvasOriginAllowed / isSameOriginCanvas are intentionally NOT called
// here. The Origin header is trivially forgeable by any container on the
// Docker network; using it as an auth bypass would let an attacker reach
// /settings/secrets, /bundles/import, /events, etc. without a bearer token.
// Those short-circuits belong ONLY in CanvasOrBearer (cosmetic routes).
func AdminAuth(database *sql.DB) gin.HandlerFunc {
return func(c *gin.Context) {
ctx := c.Request.Context()
@ -82,7 +89,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
return
}
if hasLive {
// Bearer token path — agents, CLI, and API clients.
// Bearer token is the ONLY accepted credential for admin routes.
tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
if tok != "" {
if err := wsauth.ValidateAnyToken(ctx, database, tok); err != nil {
@ -92,16 +99,6 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
c.Next()
return
}
// Canvas origin path — cross-origin canvas (CORS_ORIGINS match).
if canvasOriginAllowed(c.GetHeader("Origin")) {
c.Next()
return
}
// Same-origin canvas path — tenant image where canvas + API share a host.
if isSameOriginCanvas(c) {
c.Next()
return
}
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "admin auth required"})
return
}
@ -220,19 +217,25 @@ func isSameOriginCanvas(c *gin.Context) bool {
if !canvasProxyActive {
return false
}
referer := c.GetHeader("Referer")
if referer == "" {
return false
}
host := c.Request.Host
if host == "" {
return false
}
// Referer must start with https://<host>/ or http://<host>/ (trailing
// slash required to prevent hongming-wang.moleculesai.app.evil.com from
// matching hongming-wang.moleculesai.app).
return strings.HasPrefix(referer, "https://"+host+"/") ||
strings.HasPrefix(referer, "http://"+host+"/") ||
referer == "https://"+host ||
referer == "http://"+host
// Check Referer first (standard browser requests).
referer := c.GetHeader("Referer")
if referer != "" {
// Referer must start with https://<host>/ or http://<host>/ (trailing
// slash required to prevent hongming-wang.moleculesai.app.evil.com from
// matching hongming-wang.moleculesai.app).
if strings.HasPrefix(referer, "https://"+host+"/") ||
strings.HasPrefix(referer, "http://"+host+"/") ||
referer == "https://"+host ||
referer == "http://"+host {
return true
}
}
// Fallback: check Origin header (WebSocket upgrade requests may not have
// Referer but always send Origin).
origin := c.GetHeader("Origin")
return origin == "https://"+host || origin == "http://"+host
}

View File

@ -778,3 +778,116 @@ func TestCanvasOriginAllowed_LocalhostDefault(t *testing.T) {
t.Error("random origin should not be allowed")
}
}
// ── Issue #623 regression ─────────────────────────────────────────────────────
// AdminAuth must NOT accept forged Origin headers. Any container on the Docker
// network can set Origin: http://localhost:3000 without a bearer token, which
// previously bypassed AdminAuth on ALL admin-gated routes. (#623, dup #626)
// TestAdminAuth_623_ForgedOrigin_Returns401 — the main regression test:
// a request with a matching CORS origin but no bearer token must be rejected.
func TestAdminAuth_623_ForgedOrigin_Returns401(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
}
defer mockDB.Close()
// Platform has live tokens — AdminAuth is active.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
t.Setenv("CORS_ORIGINS", "http://localhost:3000")
r := gin.New()
r.GET("/settings/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"secrets": []string{"OPENAI_API_KEY"}})
})
w := httptest.NewRecorder()
// #623 attack: forge the canvas Origin header — no bearer token.
req, _ := http.NewRequest(http.MethodGet, "/settings/secrets", nil)
req.Header.Set("Origin", "http://localhost:3000")
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("#623 forged Origin bypass: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestAdminAuth_623_ForgedCORSOrigin_Returns401 — variant: attacker uses the
// tenant-domain CORS origin from CORS_ORIGINS (not just localhost).
func TestAdminAuth_623_ForgedCORSOrigin_Returns401(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
}
defer mockDB.Close()
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
t.Setenv("CORS_ORIGINS", "https://acme.moleculesai.app")
r := gin.New()
r.GET("/admin/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
req.Header.Set("Origin", "https://acme.moleculesai.app")
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("#623 forged tenant Origin: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestAdminAuth_623_ValidBearer_WithOrigin_Passes — bearer + matching Origin
// should still work (the Origin is irrelevant once the bearer validates).
func TestAdminAuth_623_ValidBearer_WithOrigin_Passes(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
}
defer mockDB.Close()
goodToken := "valid-bearer-token-xyz"
tokenHash := sha256.Sum256([]byte(goodToken))
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
mock.ExpectQuery(validateAnyTokenSelectQuery).
WithArgs(tokenHash[:]).
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("tok-1"))
mock.ExpectExec(validateTokenUpdateQuery).
WithArgs("tok-1").
WillReturnResult(sqlmock.NewResult(0, 1))
t.Setenv("CORS_ORIGINS", "http://localhost:3000")
r := gin.New()
r.GET("/settings/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, "/settings/secrets", nil)
req.Header.Set("Authorization", "Bearer "+goodToken)
req.Header.Set("Origin", "http://localhost:3000") // present but irrelevant
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("bearer+origin: expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}

View File

@ -63,6 +63,10 @@ type CreateWorkspacePayload struct {
WorkspaceDir string `json:"workspace_dir"` // host path to mount as /workspace (empty = isolated volume)
WorkspaceAccess string `json:"workspace_access"` // "none" (default), "read_only", or "read_write" — see #65
ParentID *string `json:"parent_id"`
// Secrets is an optional map of key→plaintext-value pairs to persist as
// workspace secrets at creation time. Stored encrypted (same path as
// POST /workspaces/:id/secrets). Nil/empty map is a no-op.
Secrets map[string]string `json:"secrets"`
Canvas struct {
X float64 `json:"x"`
Y float64 `json:"y"`

View File

@ -0,0 +1,101 @@
package router
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
"github.com/gin-gonic/gin"
)
// buildTestTokenEngine builds a minimal Gin engine containing only the
// test-token route with AdminAuth middleware — the same registration that
// router.go now uses. Allows us to verify the auth gate is enforced at the
// HTTP layer without spinning up the full Setup() dependency graph.
func buildTestTokenEngine(t *testing.T) gin.IRouter {
t.Helper()
gin.SetMode(gin.TestMode)
r := gin.New()
tokh := handlers.NewAdminTestTokenHandler()
r.GET("/admin/workspaces/:id/test-token", middleware.AdminAuth(db.DB), tokh.GetTestToken)
return r
}
// setupRouterTestDB initialises db.DB with a sqlmock connection and returns
// the mock controller. Restores db.DB on test cleanup.
func setupRouterTestDB(t *testing.T) sqlmock.Sqlmock {
t.Helper()
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
prev := db.DB
db.DB = mockDB
t.Cleanup(func() {
db.DB = prev
mockDB.Close()
})
return mock
}
// TestTestTokenRoute_RequiresAdminAuth_WhenTokensExist verifies that once the
// platform has at least one live token, the test-token endpoint returns 401
// for callers that provide no Authorization header. This is the core security
// property added by the fix — without AdminAuth in the router the request
// would reach the handler and mint a new bearer for any workspace UUID.
func TestTestTokenRoute_RequiresAdminAuth_WhenTokensExist(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development") // enable the handler itself
mock := setupRouterTestDB(t)
// HasAnyLiveTokenGlobal: platform has one enrolled workspace.
mock.ExpectQuery("SELECT COUNT.*FROM workspace_auth_tokens").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
r := buildTestTokenEngine(t)
w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/admin/workspaces/ws-target/test-token", nil)
// No Authorization header — should be rejected by AdminAuth.
r.(http.Handler).ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("expected 401 when tokens exist and no auth header, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestTestTokenRoute_FailOpenOnFreshInstall verifies that AdminAuth is
// fail-open on a fresh install (HasAnyLiveTokenGlobal == 0), so the test-token
// bootstrap path still works before the first workspace has registered.
func TestTestTokenRoute_FailOpenOnFreshInstall(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development")
mock := setupRouterTestDB(t)
// HasAnyLiveTokenGlobal: no tokens yet — fresh install.
mock.ExpectQuery("SELECT COUNT.*FROM workspace_auth_tokens").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
// Handler's own DB queries: workspace existence check + token insert.
mock.ExpectQuery("SELECT id FROM workspaces WHERE id =").
WithArgs("ws-bootstrap").
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-bootstrap"))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WillReturnResult(sqlmock.NewResult(0, 1))
r := buildTestTokenEngine(t)
w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/admin/workspaces/ws-bootstrap/test-token", nil)
r.(http.Handler).ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200 on fresh install (fail-open), got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}

View File

@ -279,6 +279,11 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
wsAuth.PUT("/secrets", sech.Set)
wsAuth.DELETE("/secrets/:key", sech.Delete)
wsAuth.GET("/model", sech.GetModel)
// Token usage metrics — cost transparency (#593).
// WorkspaceAuth middleware (on wsAuth) binds the bearer to :id.
mtrh := handlers.NewMetricsHandler()
wsAuth.GET("/metrics", mtrh.GetMetrics)
}
// Global secrets — /settings/secrets is the canonical path; /admin/secrets kept for backward compat.
@ -297,11 +302,24 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
}
// Admin — test token minting (issue #6). Hidden in production via TestTokensEnabled().
// Registered at root (not inside AdminAuth) because it is itself the bootstrap for
// acquiring a token, and it's gated on MOLECULE_ENV / MOLECULE_ENABLE_TEST_TOKENS.
// AdminAuth is a second defence-in-depth layer: on a fresh install with no tokens yet,
// AdminAuth is fail-open (HasAnyLiveTokenGlobal == 0), so the bootstrap still works.
// Once any token exists, callers must present a valid bearer — unauthenticated workspace-
// UUID enumeration is blocked even on non-production instances.
{
tokh := handlers.NewAdminTestTokenHandler()
r.GET("/admin/workspaces/:id/test-token", tokh.GetTestToken)
r.GET("/admin/workspaces/:id/test-token", middleware.AdminAuth(db.DB), tokh.GetTestToken)
}
// Admin — GitHub App installation token refresh (issue #547).
// Long-running workspaces (>60 min) use this endpoint to refresh
// GH_TOKEN without restarting. Returns the current installation token
// from the github-app-auth plugin's in-process cache (which proactively
// refreshes 5 min before expiry). 404 when no GitHub App is configured
// (dev / self-hosted without GITHUB_APP_ID).
{
ghTokH := handlers.NewGitHubTokenHandler(wh.TokenRegistry())
r.GET("/admin/github-installation-token", middleware.AdminAuth(db.DB), ghTokH.GetInstallationToken)
}
// Terminal — shares Docker client with provisioner
@ -390,6 +408,16 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// depth keeps the route behind AdminAuth regardless.
r.POST("/org/import", middleware.AdminAuth(db.DB), orgh.Import)
// Org plugin allowlist — tool governance (#591).
// Both endpoints are admin-gated: reading the allowlist reveals approved
// tooling policy; writing it enforces org-level install governance.
{
allowlistAdmin := r.Group("", middleware.AdminAuth(db.DB))
aplh := handlers.NewOrgPluginAllowlistHandler()
allowlistAdmin.GET("/orgs/:id/plugins/allowlist", aplh.GetAllowlist)
allowlistAdmin.PUT("/orgs/:id/plugins/allowlist", aplh.PutAllowlist)
}
// Channels (social integrations — Telegram, Slack, Discord, etc.)
chh := handlers.NewChannelHandler(channelMgr)
r.GET("/channels/adapters", chh.ListAdapters)
@ -408,6 +436,11 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
r.POST("/channels/discover", middleware.AdminAuth(db.DB), chh.Discover)
r.POST("/webhooks/:type", chh.Webhook)
// SSE — AG-UI compatible event stream per workspace (#590).
// WorkspaceAuth middleware (on wsAuth) binds the bearer token to :id.
sseh := handlers.NewSSEHandler(broadcaster)
wsAuth.GET("/events/stream", sseh.StreamEvents)
// WebSocket
sh := handlers.NewSocketHandler(hub)
r.GET("/ws", sh.HandleConnect)

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS workspace_token_usage;

View File

@ -0,0 +1,17 @@
-- Per-workspace LLM token usage tracking (#593 — canvas cost transparency).
-- Stores UTC-day aggregates upserted by the A2A proxy after each LLM call.
-- estimated_cost_usd is computed server-side using fixed per-model rates
-- (default: Claude Sonnet input $3/1M, output $15/1M).
CREATE TABLE IF NOT EXISTS workspace_token_usage (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
period_start TIMESTAMPTZ NOT NULL,
input_tokens BIGINT NOT NULL DEFAULT 0,
output_tokens BIGINT NOT NULL DEFAULT 0,
call_count INTEGER NOT NULL DEFAULT 0,
estimated_cost_usd NUMERIC(12,6) NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS workspace_token_usage_ws_period
ON workspace_token_usage(workspace_id, period_start);

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS org_plugin_allowlist;

View File

@ -0,0 +1,17 @@
-- Per-org plugin allowlist for tool governance (#591).
-- When an org has at least one entry in this table, workspace agents may only
-- install plugins listed here. An empty allowlist means "allow all" (backward
-- compatible with existing deployments).
--
-- org_id references the root/parent workspace that acts as the org anchor.
-- enabled_by records the workspace ID of the admin who added the entry.
CREATE TABLE IF NOT EXISTS org_plugin_allowlist (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
org_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
plugin_name TEXT NOT NULL,
enabled_by TEXT NOT NULL,
enabled_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS org_plugin_allowlist_org_plugin
ON org_plugin_allowlist(org_id, plugin_name);

View File

@ -48,6 +48,7 @@ import (
"context"
"fmt"
"sync"
"time"
)
// EnvMutator is implemented by plugins that want to inject env vars
@ -64,6 +65,34 @@ type EnvMutator interface {
MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error
}
// TokenProvider is an optional interface that EnvMutator implementations
// may also satisfy. When a mutator implements TokenProvider the platform
// can serve GET /admin/github-installation-token, allowing long-running
// workspaces to fetch a fresh GitHub token without restarting.
//
// # Why a separate interface?
//
// EnvMutator.MutateEnv is called once at provision time and writes into
// an env map. Calling it again just to read the current token would be
// semantically wrong and potentially unsafe (the env map is a live
// workspace struct). TokenProvider cleanly separates "what do I inject
// at boot?" from "what is the live token right now?".
//
// # Plugin contract
//
// Token must return the current valid token and the time at which it
// will expire. If the plugin's internal cache is past its refresh
// threshold it must block until a new token is obtained before
// returning. Token should never return an expired token — callers rely
// on this guarantee and do not do their own expiry check.
//
// Returning a non-nil error causes the HTTP handler to respond 500 and
// log "[github] token refresh failed: <err>". The workspace will retry
// on its next credential-helper invocation.
type TokenProvider interface {
Token(ctx context.Context) (token string, expiresAt time.Time, err error)
}
// Registry holds the ordered list of EnvMutator instances the
// provisioner runs before each workspace boot. Safe for concurrent
// registration + execution.
@ -112,6 +141,26 @@ func (r *Registry) Names() []string {
return names
}
// FirstTokenProvider returns the first registered mutator that also
// implements TokenProvider, or nil if none do. Used to back the
// GET /admin/github-installation-token endpoint so long-running
// workspaces can refresh their GITHUB_TOKEN without a container restart.
//
// A nil registry returns nil (no provider configured).
func (r *Registry) FirstTokenProvider() TokenProvider {
if r == nil {
return nil
}
r.mu.RLock()
defer r.mu.RUnlock()
for _, m := range r.mutators {
if tp, ok := m.(TokenProvider); ok {
return tp
}
}
return nil
}
// Run calls every registered mutator in order. The first one to return
// a non-nil error aborts the chain — subsequent mutators do NOT run,
// and the error is returned to the caller (which marks the workspace

View File

@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""Deduplicate hook entries in .claude/settings.json across all workspace containers.
Root cause: molecule_runtime's _deep_merge_hooks() uses unconditional list.extend()
when merging plugin settings-fragment.json files. On every plugin install/reinstall
each hook handler is appended again, producing 3-4x duplicates that cause every
hook to fire 3-4x per event.
This script fixes the live settings.json in every running workspace container via
the shared /proc/<PID>/root filesystem (no docker CLI required), then validates the
output is clean JSON. Safe to re-run idempotent (already-clean files are skipped).
Upstream fix needed: molecule_runtime.plugins_registry.builtins._deep_merge_hooks()
should deduplicate by (matcher, frozenset(commands)) before writing. Tracked in
molecule-core issue (filed separately).
Usage:
python3 scripts/dedup_settings_hooks.py [--dry-run]
"""
from __future__ import annotations
import glob
import json
import sys
DRY_RUN = "--dry-run" in sys.argv
def dedup_settings(data: dict) -> tuple[dict, dict[str, tuple[int, int]]]:
"""Return (deduped_data, stats) where stats[event] = (before_count, after_count)."""
if "hooks" not in data:
return data, {}
new_hooks: dict = {}
stats: dict[str, tuple[int, int]] = {}
for event, handlers in data["hooks"].items():
seen: set = set()
deduped: list = []
for handler in handlers:
matcher = handler.get("matcher", "")
commands = frozenset(h.get("command", "") for h in handler.get("hooks", []))
key = (matcher, commands)
if key not in seen:
seen.add(key)
deduped.append(handler)
stats[event] = (len(handlers), len(deduped))
new_hooks[event] = deduped
return {**data, "hooks": new_hooks}, stats
def main() -> None:
pattern = "/proc/*/root/configs/.claude/settings.json"
paths = sorted(glob.glob(pattern))
fixed: list[tuple[str, dict]] = []
already_clean: list[str] = []
errors: list[tuple[str, str]] = []
for path in paths:
try:
with open(path) as f:
data = json.load(f)
deduped, stats = dedup_settings(data)
changed = any(before != after for before, after in stats.values())
if changed:
if not DRY_RUN:
with open(path, "w") as f:
json.dump(deduped, f, indent=2)
f.write("\n")
fixed.append((path, stats))
else:
already_clean.append(path)
except PermissionError as e:
errors.append((path, f"PermissionError: {e}"))
except json.JSONDecodeError as e:
errors.append((path, f"JSONDecodeError: {e}"))
except Exception as e:
errors.append((path, str(e)))
mode = "[DRY RUN] " if DRY_RUN else ""
print(f"{mode}Fixed: {len(fixed)}")
for path, stats in fixed:
pid = path.split("/")[2]
summary = ", ".join(f"{ev}: {b}{a}" for ev, (b, a) in stats.items() if b != a)
print(f" PID {pid}: {summary}")
print(f"{mode}Already clean: {len(already_clean)}")
if errors:
print(f"Errors: {len(errors)}")
for path, err in errors:
print(f" {path}: {err}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""Verify settings.json hook deduplication across all workspace containers.
Exits 0 if all containers have clean (no-duplicate) hook lists.
Exits 1 if any container still has duplicate hook entries.
Usage:
python3 scripts/verify_settings_hooks.py
"""
from __future__ import annotations
import glob
import json
import sys
def has_duplicates(data: dict) -> tuple[bool, dict[str, tuple[int, int]]]:
stats: dict[str, tuple[int, int]] = {}
duplicate_found = False
for event, handlers in data.get("hooks", {}).items():
seen: set = set()
for handler in handlers:
matcher = handler.get("matcher", "")
commands = frozenset(h.get("command", "") for h in handler.get("hooks", []))
key = (matcher, commands)
if key in seen:
duplicate_found = True
seen.add(key)
stats[event] = (len(handlers), len(seen))
return duplicate_found, stats
def main() -> None:
pattern = "/proc/*/root/configs/.claude/settings.json"
paths = sorted(glob.glob(pattern))
dirty: list[tuple[str, dict]] = []
clean = 0
errors: list[tuple[str, str]] = []
for path in paths:
try:
with open(path) as f:
data = json.load(f)
dup, stats = has_duplicates(data)
if dup:
dirty.append((path, stats))
else:
clean += 1
except Exception as e:
errors.append((path, str(e)))
print(f"Clean: {clean} Dirty: {len(dirty)} Errors: {len(errors)}")
for path, stats in dirty:
pid = path.split("/")[2]
summary = ", ".join(f"{ev}: {total} total/{unique} unique" for ev, (total, unique) in stats.items())
print(f" DIRTY PID {pid}: {summary}")
for path, err in errors:
print(f" ERROR {path}: {err}", file=sys.stderr)
if dirty or errors:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,130 @@
# Google ADK Adapter
Molecule AI workspace adapter for [Google Agent Development Kit (ADK)](https://github.com/google/adk-python) — Google's official multi-agent Python SDK (~19k ⭐, Apache-2.0).
## Overview
This adapter bridges the A2A protocol used by the Molecule AI platform to Google ADK's runner/session model. Agents are backed by Google Gemini models via AI Studio or Vertex AI. Each workspace gets an `LlmAgent` wrapped in a `Runner` with an `InMemorySessionService`; sessions are tied to A2A task context IDs for stable, isolated per-conversation state.
**Runtime key:** `google-adk`
## Installation
The adapter dependencies are installed automatically by `entrypoint.sh` from this directory's `requirements.txt`:
```bash
pip install -r adapters/google-adk/requirements.txt
```
You'll also need a Google API key (AI Studio) or Vertex AI credentials.
## Configuration
### `config.yaml`
```yaml
runtime: google-adk
model: google:gemini-2.0-flash # or gemini-1.5-pro, gemini-2.5-flash, etc.
runtime_config:
agent_name: my-agent # optional, default: molecule-adk-agent
max_output_tokens: 8192 # optional, default: 8192
temperature: 1.0 # optional, default: 1.0
```
### Environment Variables
| Variable | Required | Description |
|----------|----------|-------------|
| `GOOGLE_API_KEY` | Yes (unless Vertex AI) | Google AI Studio API key |
| `GOOGLE_GENAI_USE_VERTEXAI` | No | Set to `"1"` to use Vertex AI instead of AI Studio |
| `GOOGLE_CLOUD_PROJECT` | When using Vertex AI | GCP project ID |
| `GOOGLE_CLOUD_LOCATION` | When using Vertex AI | GCP region, e.g. `"us-central1"` |
## Usage Example
```python
import asyncio
from adapter_base import AdapterConfig
from adapters.google_adk.adapter import GoogleADKAdapter
async def main():
config = AdapterConfig(
model="google:gemini-2.0-flash",
system_prompt="You are a helpful assistant.",
runtime_config={
"agent_name": "demo-agent",
"max_output_tokens": 1024,
"temperature": 0.7,
},
workspace_id="ws-demo",
)
adapter = GoogleADKAdapter()
await adapter.setup(config) # validates keys, loads plugins/skills
executor = await adapter.create_executor(config) # returns GoogleADKA2AExecutor
# executor.execute(context, event_queue) is called by the A2A server per turn
print(f"Adapter: {adapter.display_name()} — model {config.model}")
asyncio.run(main())
```
### Running via A2A
Once the workspace is provisioned, send A2A messages as normal:
```bash
curl -X POST http://localhost:8000 \
-H 'Content-Type: application/json' \
-d '{
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"kind": "text", "text": "What is 2 + 2?"}]
}
}
}'
```
## Supported Models
Any model supported by Google ADK and available through your credential path:
| Model | Notes |
|-------|-------|
| `gemini-2.0-flash` | Recommended — fast, cost-effective |
| `gemini-2.5-flash` | Latest preview, strong reasoning |
| `gemini-1.5-pro` | Higher capability, higher latency |
| `gemini-1.5-flash` | Fast, lower cost |
Use the `google:` prefix in `config.yaml` — the adapter strips it before passing the model name to ADK.
## Architecture
```
A2A Request
GoogleADKA2AExecutor.execute()
├── extract_message_text() ← shared_runtime helper
├── _ensure_session() ← create/reuse InMemorySessionService session
├── _build_content() ← wrap text in google.genai.types.Content
runner.run_async(session_id, user_id, new_message)
ADK Event stream → filter is_final_response() → extract text
event_queue.enqueue_event(new_agent_text_message(reply))
A2A Response
```
## License
Apache-2.0 — same as [google/adk-python](https://github.com/google/adk-python).

View File

@ -0,0 +1,392 @@
"""Google ADK adapter for Molecule AI workspace runtime.
Wraps Google's Agent Development Kit (google-adk v1.x) as a Molecule AI
WorkspaceAdapter, bridging the A2A protocol to Google ADK's runner/session
model.
Google ADK concepts used
------------------------
- ``google.adk.agents.LlmAgent`` An LLM-backed agent with instructions and
optional tools. Declared with ``model``, ``name``, and ``instruction``.
- ``google.adk.runners.Runner`` Drives one or more agents inside a session;
``run_async()`` streams ``Event`` objects, including the final response text.
- ``google.adk.sessions.InMemorySessionService`` Manages session state in
memory. Each ``Runner`` owns a single ``InMemorySessionService`` instance.
Runtime-config keys (all optional)
------------------------------------
``max_output_tokens`` int, default 8192. Forwarded to the ADK ``GenerateContentConfig``.
``temperature`` float, default 1.0.
``agent_name`` str, default ``"molecule-adk-agent"``.
Environment variables
---------------------
``GOOGLE_API_KEY`` Google AI Studio key (required for ``gemini-*`` models).
``GOOGLE_GENAI_USE_VERTEXAI`` set to ``"1"`` to use Vertex AI instead of AI
Studio. In that case supply
``GOOGLE_CLOUD_PROJECT`` and
``GOOGLE_CLOUD_LOCATION`` as well.
"""
from __future__ import annotations
import logging
import os
from typing import TYPE_CHECKING, Any
from a2a.server.agent_execution import AgentExecutor, RequestContext
from a2a.server.events import EventQueue
from a2a.utils import new_agent_text_message
from adapter_base import AdapterConfig, BaseAdapter
if TYPE_CHECKING:
pass
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
_DEFAULT_AGENT_NAME = "molecule-adk-agent"
_DEFAULT_MAX_OUTPUT_TOKENS = 8192
_DEFAULT_TEMPERATURE = 1.0
_NO_TEXT_MSG = "Error: message contained no text content."
_NO_RESPONSE_MSG = "(no response generated)"
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor
# ---------------------------------------------------------------------------
class GoogleADKA2AExecutor(AgentExecutor):
"""A2A executor backed by a Google ADK ``Runner``.
Each executor instance owns a single ``Runner`` and ``InMemorySessionService``.
Sessions are created on first use and reused across subsequent turns
(the session_id is derived from the A2A context_id so each task gets a
stable, isolated session).
Parameters
----------
model:
ADK model identifier, e.g. ``"gemini-2.0-flash"`` or
``"gemini-1.5-pro"``.
system_prompt:
Optional instruction prepended to every conversation. Passed to
``LlmAgent(instruction=...)``.
agent_name:
Internal ADK agent name. Defaults to ``_DEFAULT_AGENT_NAME``.
max_output_tokens:
Token cap forwarded to ``GenerateContentConfig``.
temperature:
Sampling temperature forwarded to ``GenerateContentConfig``.
heartbeat:
Optional ``HeartbeatLoop`` instance (unused directly but stored for
future heartbeat integration).
_runner:
Inject a pre-built ``Runner`` for testing only. When provided,
the real ADK ``Runner`` is never constructed.
"""
def __init__(
self,
model: str,
system_prompt: str | None = None,
agent_name: str = _DEFAULT_AGENT_NAME,
max_output_tokens: int = _DEFAULT_MAX_OUTPUT_TOKENS,
temperature: float = _DEFAULT_TEMPERATURE,
heartbeat: Any = None,
_runner: Any = None,
) -> None:
self.model = model
self.system_prompt = system_prompt
self.agent_name = agent_name
self.max_output_tokens = max_output_tokens
self.temperature = temperature
self._heartbeat = heartbeat
self._sessions_created: set[str] = set()
if _runner is not None:
# Test injection — skip building the real ADK objects.
self._runner = _runner
else:
self._runner = self._build_runner()
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _build_runner(self) -> Any: # pragma: no cover — requires real ADK
"""Construct a Google ADK ``Runner`` with an ``LlmAgent``.
Lazy-imports ``google.adk`` so the rest of the workspace runtime
doesn't pull in google-adk on startup (it's only needed when this
executor is actually instantiated by ``GoogleADKAdapter.create_executor``).
"""
from google.adk.agents import LlmAgent
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
agent = LlmAgent(
name=self.agent_name,
model=self.model,
instruction=self.system_prompt or "",
)
session_service = InMemorySessionService()
runner = Runner(
agent=agent,
app_name=self.agent_name,
session_service=session_service,
)
return runner
async def _ensure_session(self, session_id: str, user_id: str) -> None:
"""Create a session in the service if it doesn't exist yet."""
if session_id in self._sessions_created:
return
session_service = self._runner.session_service
existing = await session_service.get_session(
app_name=self.agent_name,
user_id=user_id,
session_id=session_id,
)
if existing is None:
await session_service.create_session(
app_name=self.agent_name,
user_id=user_id,
session_id=session_id,
)
self._sessions_created.add(session_id)
def _extract_text(self, context: RequestContext) -> str:
"""Pull plain text out of the A2A message parts."""
from shared_runtime import extract_message_text
return extract_message_text(context)
def _build_content(self, user_text: str) -> Any:
"""Wrap user text in an ADK-compatible ``Content`` object."""
from google.genai.types import Content, Part
return Content(role="user", parts=[Part(text=user_text)])
# ------------------------------------------------------------------
# AgentExecutor interface
# ------------------------------------------------------------------
async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
"""Run a single ADK turn and enqueue the reply as an A2A Message.
Sequence:
1. Extract user text from A2A message parts.
2. Ensure an ADK session exists for this context_id.
3. Call ``runner.run_async()`` and collect all response events.
4. Concatenate final-response text; fall back to ``_NO_RESPONSE_MSG``
when the model produces no output.
5. Enqueue the reply via ``event_queue``.
"""
user_text = self._extract_text(context)
if not user_text:
parts = getattr(getattr(context, "message", None), "parts", None)
logger.warning("GoogleADKA2AExecutor: no text in message parts: %s", parts)
await event_queue.enqueue_event(new_agent_text_message(_NO_TEXT_MSG))
return
session_id = getattr(context, "context_id", None) or "default-session"
user_id = "molecule-user"
try:
await self._ensure_session(session_id, user_id)
content = self._build_content(user_text)
response_parts: list[str] = []
async for event in self._runner.run_async(
session_id=session_id,
user_id=user_id,
new_message=content,
):
# Collect text from final-response events
if not getattr(event, "is_final_response", lambda: False)():
continue
candidate_response = getattr(event, "response", None)
if candidate_response is None:
continue
for part in getattr(
getattr(candidate_response, "content", None) or MissingContent(),
"parts", []
):
text = getattr(part, "text", None)
if text:
response_parts.append(text)
final_text = "".join(response_parts).strip() or _NO_RESPONSE_MSG
await event_queue.enqueue_event(new_agent_text_message(final_text))
except Exception as exc:
logger.error(
"GoogleADKA2AExecutor: execution error [model=%s]: %s",
self.model,
type(exc).__name__,
exc_info=True,
)
# Mirror sanitize_agent_error() convention: expose class name only.
await event_queue.enqueue_event(
new_agent_text_message(f"Agent error: {type(exc).__name__}")
)
async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
"""Cancel a running task — emits canceled state per A2A protocol."""
from a2a.types import TaskState, TaskStatus, TaskStatusUpdateEvent
await event_queue.enqueue_event(
TaskStatusUpdateEvent(
status=TaskStatus(state=TaskState.canceled),
final=True,
)
)
class MissingContent:
"""Sentinel to avoid AttributeError when response.content is None."""
parts: list = []
# ---------------------------------------------------------------------------
# GoogleADKAdapter
# ---------------------------------------------------------------------------
class GoogleADKAdapter(BaseAdapter):
"""Molecule AI workspace adapter for Google ADK (google-adk v1.x).
Implements the full ``BaseAdapter`` lifecycle:
- ``setup()`` validates config and runs ``_common_setup()``.
- ``create_executor()`` returns a ``GoogleADKA2AExecutor`` configured
from ``AdapterConfig``.
"""
# Stored by setup(); consumed by create_executor()
_setup_result: Any = None
# ------------------------------------------------------------------
# Identity
# ------------------------------------------------------------------
@staticmethod
def name() -> str:
"""Runtime identifier — matches the ``runtime`` field in config.yaml."""
return "google-adk"
@staticmethod
def display_name() -> str:
"""Human-readable name shown in the Molecule AI UI."""
return "Google ADK"
@staticmethod
def description() -> str:
"""Short description of this adapter's capabilities."""
return (
"Google Agent Development Kit (ADK) adapter. "
"Runs LLM agents via Google Gemini models using the official "
"google-adk Python SDK (Apache-2.0)."
)
@staticmethod
def get_config_schema() -> dict:
"""JSON Schema for runtime_config fields rendered in the Config tab."""
return {
"type": "object",
"properties": {
"agent_name": {
"type": "string",
"default": _DEFAULT_AGENT_NAME,
"description": "Internal ADK agent name",
},
"max_output_tokens": {
"type": "integer",
"default": _DEFAULT_MAX_OUTPUT_TOKENS,
"description": "Maximum output tokens for the Gemini model",
},
"temperature": {
"type": "number",
"default": _DEFAULT_TEMPERATURE,
"minimum": 0.0,
"maximum": 2.0,
"description": "Sampling temperature",
},
},
"additionalProperties": False,
}
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
async def setup(self, config: AdapterConfig) -> None:
"""Validate config and run the shared platform setup pipeline.
Raises ``RuntimeError`` if the required API key is not set and
Vertex AI mode is not active.
Args:
config: ``AdapterConfig`` populated by the workspace runtime.
"""
use_vertex = os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "").strip() in ("1", "true", "True")
api_key = os.environ.get("GOOGLE_API_KEY", "").strip()
if not use_vertex and not api_key:
raise RuntimeError(
"GoogleADKAdapter requires GOOGLE_API_KEY (for AI Studio) or "
"GOOGLE_GENAI_USE_VERTEXAI=1 with GOOGLE_CLOUD_PROJECT set."
)
logger.info(
"GoogleADKAdapter.setup: model=%s vertex=%s", config.model, use_vertex
)
self._setup_result = await self._common_setup(config)
async def create_executor(self, config: AdapterConfig) -> GoogleADKA2AExecutor:
"""Build and return a ``GoogleADKA2AExecutor`` for A2A integration.
Uses the system prompt assembled by ``_common_setup()`` in ``setup()``.
Runtime-config keys ``agent_name``, ``max_output_tokens``, and
``temperature`` are respected when present.
Args:
config: ``AdapterConfig`` populated by the workspace runtime.
Returns:
A ready-to-use ``GoogleADKA2AExecutor`` instance.
"""
rc = config.runtime_config or {}
# Strip provider prefix from model, e.g. "google:gemini-2.0-flash" → "gemini-2.0-flash"
model = config.model
if ":" in model:
model = model.split(":", 1)[1]
system_prompt = (
self._setup_result.system_prompt
if self._setup_result is not None
else config.system_prompt or ""
)
return GoogleADKA2AExecutor(
model=model,
system_prompt=system_prompt,
agent_name=rc.get("agent_name", _DEFAULT_AGENT_NAME),
max_output_tokens=int(rc.get("max_output_tokens", _DEFAULT_MAX_OUTPUT_TOKENS)),
temperature=float(rc.get("temperature", _DEFAULT_TEMPERATURE)),
heartbeat=config.heartbeat,
)
# ---------------------------------------------------------------------------
# Module-level alias required by the adapter autodiscovery loader
# ---------------------------------------------------------------------------
Adapter = GoogleADKAdapter

View File

@ -0,0 +1,7 @@
# Google ADK adapter dependencies
# Pin to the latest stable release — update when a new version is verified.
google-adk==1.30.0
# google-adk transitively requires google-genai; pin explicitly for
# reproducibility (same pinning convention as other adapter requirements.txt).
google-genai>=1.16.0

View File

@ -0,0 +1,996 @@
"""Unit tests for adapters/google-adk/adapter.py.
Coverage targets (100%)
-----------------------
- Module constants: _DEFAULT_AGENT_NAME, _DEFAULT_MAX_OUTPUT_TOKENS, etc.
- MissingContent sentinel class
- GoogleADKA2AExecutor.__init__ field assignment + runner injection
- GoogleADKA2AExecutor._extract_text
- GoogleADKA2AExecutor._build_content
- GoogleADKA2AExecutor._ensure_session first call (create), subsequent call (skip)
- GoogleADKA2AExecutor.execute happy path, empty input, API error,
no final_response events, partial text
- GoogleADKA2AExecutor.cancel TaskStatusUpdateEvent emitted
- GoogleADKAdapter.name / display_name / description / get_config_schema
- GoogleADKAdapter.setup success, missing key, vertex override
- GoogleADKAdapter.create_executor model stripping, defaults, rc overrides
- Adapter alias
All google-adk, google-genai, and shared_runtime calls are mocked.
No live API calls are made.
"""
from __future__ import annotations
import sys
from types import ModuleType
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Stub heavy external modules BEFORE the adapter is imported.
# conftest.py already stubs: a2a, builtin_tools, langchain_core.
# We need to additionally stub: google.adk, google.genai, shared_runtime.
# ---------------------------------------------------------------------------
def _make_a2a_stubs() -> None:
"""Register minimal a2a SDK stubs in sys.modules.
Mirrors what workspace-template/tests/conftest.py does; needed because
this test file lives outside the ``tests/`` directory and conftest.py
is not automatically loaded for it.
"""
if "a2a" in sys.modules:
# Already mocked by conftest — just ensure new_agent_text_message is passthrough
a2a_utils = sys.modules.get("a2a.utils")
if a2a_utils and callable(getattr(a2a_utils, "new_agent_text_message", None)):
a2a_utils.new_agent_text_message = lambda text, **kwargs: text
return
agent_execution_mod = ModuleType("a2a.server.agent_execution")
class AgentExecutor:
pass
class RequestContext:
pass
agent_execution_mod.AgentExecutor = AgentExecutor
agent_execution_mod.RequestContext = RequestContext
events_mod = ModuleType("a2a.server.events")
class EventQueue:
pass
events_mod.EventQueue = EventQueue
tasks_mod = ModuleType("a2a.server.tasks")
types_mod = ModuleType("a2a.types")
class TextPart:
def __init__(self, text=""):
self.text = text
class Part:
def __init__(self, root=None):
self.root = root
types_mod.TextPart = TextPart
types_mod.Part = Part
utils_mod = ModuleType("a2a.utils")
# Passthrough so tests can assert on the plain text string, matching the
# hermes_executor test convention from conftest.py.
utils_mod.new_agent_text_message = lambda text, **kwargs: text
a2a_mod = ModuleType("a2a")
a2a_server_mod = ModuleType("a2a.server")
sys.modules["a2a"] = a2a_mod
sys.modules["a2a.server"] = a2a_server_mod
sys.modules["a2a.server.agent_execution"] = agent_execution_mod
sys.modules["a2a.server.events"] = events_mod
sys.modules["a2a.server.tasks"] = tasks_mod
sys.modules["a2a.types"] = types_mod
sys.modules["a2a.utils"] = utils_mod
def _make_google_adk_stubs() -> None:
"""Register minimal google.adk and google.genai stubs in sys.modules."""
# google (top-level namespace package)
google_mod = sys.modules.get("google") or ModuleType("google")
google_mod.__path__ = []
sys.modules.setdefault("google", google_mod)
# google.genai
google_genai_mod = ModuleType("google.genai")
google_genai_mod.__path__ = []
google_genai_types_mod = ModuleType("google.genai.types")
class _Content:
def __init__(self, role="user", parts=None):
self.role = role
self.parts = parts or []
class _Part:
def __init__(self, text=""):
self.text = text
google_genai_types_mod.Content = _Content
google_genai_types_mod.Part = _Part
sys.modules["google.genai"] = google_genai_mod
sys.modules["google.genai.types"] = google_genai_types_mod
# google.adk
google_adk_mod = ModuleType("google.adk")
google_adk_mod.__path__ = []
# google.adk.agents
google_adk_agents_mod = ModuleType("google.adk.agents")
class _LlmAgent:
def __init__(self, name="", model="", instruction="", tools=None):
self.name = name
self.model = model
self.instruction = instruction
self.tools = tools or []
google_adk_agents_mod.LlmAgent = _LlmAgent
# google.adk.runners
google_adk_runners_mod = ModuleType("google.adk.runners")
class _Runner:
def __init__(self, agent=None, app_name="", session_service=None):
self.agent = agent
self.app_name = app_name
self.session_service = session_service
async def run_async(self, session_id, user_id, new_message):
# Stub — tests override this via mock runner
return
yield # make it an async generator
google_adk_runners_mod.Runner = _Runner
# google.adk.sessions
google_adk_sessions_mod = ModuleType("google.adk.sessions")
class _InMemorySessionService:
def __init__(self):
self._sessions: dict = {}
async def get_session(self, app_name, user_id, session_id):
return self._sessions.get((app_name, user_id, session_id))
async def create_session(self, app_name, user_id, session_id):
self._sessions[(app_name, user_id, session_id)] = {"id": session_id}
return self._sessions[(app_name, user_id, session_id)]
google_adk_sessions_mod.InMemorySessionService = _InMemorySessionService
sys.modules["google.adk"] = google_adk_mod
sys.modules["google.adk.agents"] = google_adk_agents_mod
sys.modules["google.adk.runners"] = google_adk_runners_mod
sys.modules["google.adk.sessions"] = google_adk_sessions_mod
def _make_shared_runtime_stub() -> None:
"""Register shared_runtime stub with extract_message_text."""
if "shared_runtime" not in sys.modules:
mod = ModuleType("shared_runtime")
def _extract_message_text(ctx) -> str:
parts = getattr(getattr(ctx, "message", None), "parts", None)
if parts is None:
parts = ctx
texts = []
for p in parts or []:
t = getattr(p, "text", None) or getattr(
getattr(p, "root", None), "text", None
) or ""
if t:
texts.append(t)
return " ".join(texts).strip()
mod.extract_message_text = _extract_message_text
sys.modules["shared_runtime"] = mod
def _make_adapter_base_stub() -> None:
"""Register adapter_base stub in sys.modules."""
if "adapter_base" not in sys.modules:
mod = ModuleType("adapter_base")
from dataclasses import dataclass, field
from abc import ABC, abstractmethod
@dataclass
class AdapterConfig:
model: str = "google:gemini-2.0-flash"
system_prompt: str | None = None
tools: list = field(default_factory=list)
runtime_config: dict = field(default_factory=dict)
config_path: str = "/configs"
workspace_id: str = ""
prompt_files: list = field(default_factory=list)
a2a_port: int = 8000
heartbeat: object = None
class BaseAdapter(ABC):
@staticmethod
@abstractmethod
def name() -> str: ... # pragma: no cover
@staticmethod
@abstractmethod
def display_name() -> str: ... # pragma: no cover
@staticmethod
@abstractmethod
def description() -> str: ... # pragma: no cover
@staticmethod
def get_config_schema() -> dict:
return {}
def memory_filename(self) -> str:
return "CLAUDE.md"
def register_tool_hook(self, name, fn): return None # noqa
async def transcript_lines(self, since=0, limit=100): return {"supported": False} # noqa
def register_subagent_hook(self, name, spec): return None # noqa
def append_to_memory_hook(self, config, filename, content): pass # noqa
async def install_plugins_via_registry(self, config, plugins): return [] # noqa
async def inject_plugins(self, config, plugins):
await self.install_plugins_via_registry(config, plugins)
async def _common_setup(self, config):
from types import SimpleNamespace
return SimpleNamespace(
system_prompt="mocked system prompt",
loaded_skills=[],
langchain_tools=[],
is_coordinator=False,
children=[],
)
@abstractmethod
async def setup(self, config) -> None: ... # pragma: no cover
@abstractmethod
async def create_executor(self, config): ... # pragma: no cover
mod.AdapterConfig = AdapterConfig
mod.BaseAdapter = BaseAdapter
mod.SetupResult = None
sys.modules["adapter_base"] = mod
# Install all stubs before importing the module under test
# Order matters: a2a must be stubbed before adapter.py is imported so that
# `from a2a.utils import new_agent_text_message` resolves to the passthrough.
_make_a2a_stubs()
_make_google_adk_stubs()
_make_shared_runtime_stub()
_make_adapter_base_stub()
# Now safe to import the adapter
import sys as _sys
import os as _os
_adapter_dir = _os.path.dirname(_os.path.abspath(__file__))
if _adapter_dir not in _sys.path:
_sys.path.insert(0, _adapter_dir)
from adapter import ( # noqa: E402
Adapter,
GoogleADKA2AExecutor,
GoogleADKAdapter,
MissingContent,
_DEFAULT_AGENT_NAME,
_DEFAULT_MAX_OUTPUT_TOKENS,
_DEFAULT_TEMPERATURE,
_NO_RESPONSE_MSG,
_NO_TEXT_MSG,
)
# ---------------------------------------------------------------------------
# Fixtures and helpers
# ---------------------------------------------------------------------------
def _make_context(text: str, context_id: str = "ctx-test") -> MagicMock:
"""Return a mock RequestContext with the given text in message.parts."""
part = MagicMock()
part.text = text
ctx = MagicMock()
ctx.message.parts = [part]
ctx.context_id = context_id
return ctx
def _make_empty_context() -> MagicMock:
"""Return a context whose message parts contain no text."""
part = MagicMock(spec=[])
part.root = MagicMock(spec=[])
ctx = MagicMock()
ctx.message.parts = [part]
ctx.context_id = "ctx-empty"
return ctx
def _make_event(is_final: bool, text: str | None = None) -> MagicMock:
"""Build a mock ADK Event that optionally is a final response."""
event = MagicMock()
event.is_final_response = MagicMock(return_value=is_final)
if text is not None:
part = MagicMock()
part.text = text
event.response = MagicMock()
event.response.content = MagicMock()
event.response.content.parts = [part]
else:
event.response = None
return event
async def _async_gen(*events):
"""Yield events one by one as an async generator."""
for e in events:
yield e
def _make_runner(events=None) -> MagicMock:
"""Return a mock Runner whose run_async yields the given events."""
runner = MagicMock()
runner.session_service = AsyncMock()
runner.session_service.get_session = AsyncMock(return_value=None)
runner.session_service.create_session = AsyncMock(return_value={"id": "s1"})
evts = events or []
runner.run_async = MagicMock(return_value=_async_gen(*evts))
return runner
def _make_executor(
model: str = "gemini-2.0-flash",
system_prompt: str | None = "You are helpful.",
runner: MagicMock | None = None,
) -> GoogleADKA2AExecutor:
"""Create a GoogleADKA2AExecutor with an injected mock runner."""
return GoogleADKA2AExecutor(
model=model,
system_prompt=system_prompt,
_runner=runner or _make_runner(),
)
def _make_adapter_config(**kwargs) -> object:
"""Return an AdapterConfig with sensible defaults."""
from adapter_base import AdapterConfig
defaults = dict(
model="google:gemini-2.0-flash",
system_prompt="Test prompt.",
runtime_config={},
workspace_id="ws-test",
)
defaults.update(kwargs)
return AdapterConfig(**defaults)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
def test_default_agent_name():
assert _DEFAULT_AGENT_NAME == "molecule-adk-agent"
def test_default_max_output_tokens():
assert _DEFAULT_MAX_OUTPUT_TOKENS == 8192
def test_default_temperature():
assert _DEFAULT_TEMPERATURE == 1.0
def test_no_text_msg_constant():
assert "no text" in _NO_TEXT_MSG.lower()
def test_no_response_msg_constant():
assert "no response" in _NO_RESPONSE_MSG.lower()
# ---------------------------------------------------------------------------
# MissingContent sentinel
# ---------------------------------------------------------------------------
def test_missing_content_has_empty_parts():
mc = MissingContent()
assert mc.parts == []
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — construction
# ---------------------------------------------------------------------------
def test_constructor_stores_fields():
runner = _make_runner()
executor = GoogleADKA2AExecutor(
model="gemini-1.5-pro",
system_prompt="Hello",
agent_name="my-agent",
max_output_tokens=4096,
temperature=0.5,
_runner=runner,
)
assert executor.model == "gemini-1.5-pro"
assert executor.system_prompt == "Hello"
assert executor.agent_name == "my-agent"
assert executor.max_output_tokens == 4096
assert executor.temperature == 0.5
assert executor._runner is runner
assert executor._sessions_created == set()
def test_constructor_defaults():
executor = GoogleADKA2AExecutor(model="gemini-2.0-flash", _runner=_make_runner())
assert executor.system_prompt is None
assert executor.agent_name == _DEFAULT_AGENT_NAME
assert executor.max_output_tokens == _DEFAULT_MAX_OUTPUT_TOKENS
assert executor.temperature == _DEFAULT_TEMPERATURE
assert executor._heartbeat is None
def test_constructor_uses_injected_runner():
stub = MagicMock()
stub.session_service = MagicMock()
executor = GoogleADKA2AExecutor(model="gemini-2.0-flash", _runner=stub)
assert executor._runner is stub
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — _extract_text
# ---------------------------------------------------------------------------
def test_extract_text_returns_message_text():
executor = _make_executor()
ctx = _make_context("Hello world")
result = executor._extract_text(ctx)
assert result == "Hello world"
def test_extract_text_empty_context():
executor = _make_executor()
ctx = _make_empty_context()
result = executor._extract_text(ctx)
assert result == ""
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — _build_content
# ---------------------------------------------------------------------------
def test_build_content_creates_content_object():
executor = _make_executor()
content = executor._build_content("test message")
assert content.role == "user"
assert len(content.parts) == 1
assert content.parts[0].text == "test message"
def test_build_content_empty_string():
executor = _make_executor()
content = executor._build_content("")
assert content.parts[0].text == ""
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — _ensure_session
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_ensure_session_creates_when_not_exists():
runner = _make_runner()
runner.session_service.get_session = AsyncMock(return_value=None)
executor = GoogleADKA2AExecutor(
model="gemini-2.0-flash", agent_name="test-agent", _runner=runner
)
await executor._ensure_session("session-1", "user-1")
runner.session_service.create_session.assert_called_once_with(
app_name="test-agent",
user_id="user-1",
session_id="session-1",
)
assert "session-1" in executor._sessions_created
@pytest.mark.asyncio
async def test_ensure_session_skips_if_already_tracked():
runner = _make_runner()
executor = GoogleADKA2AExecutor(
model="gemini-2.0-flash", _runner=runner
)
executor._sessions_created.add("session-x")
await executor._ensure_session("session-x", "user-1")
# Neither get_session nor create_session should be called
runner.session_service.get_session.assert_not_called()
runner.session_service.create_session.assert_not_called()
@pytest.mark.asyncio
async def test_ensure_session_skips_create_when_existing():
runner = _make_runner()
runner.session_service.get_session = AsyncMock(return_value={"id": "s1"})
executor = GoogleADKA2AExecutor(
model="gemini-2.0-flash", agent_name="test-agent", _runner=runner
)
await executor._ensure_session("session-existing", "user-1")
runner.session_service.create_session.assert_not_called()
assert "session-existing" in executor._sessions_created
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — execute: happy path
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_execute_returns_response_text():
event = _make_event(is_final=True, text="The answer is 42.")
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("What is 6×7?")
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with("The answer is 42.")
@pytest.mark.asyncio
async def test_execute_concatenates_multiple_final_parts():
part1 = MagicMock()
part1.text = "Hello "
part2 = MagicMock()
part2.text = "world"
event = MagicMock()
event.is_final_response = MagicMock(return_value=True)
event.response = MagicMock()
event.response.content = MagicMock()
event.response.content.parts = [part1, part2]
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("Hi")
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with("Hello world")
@pytest.mark.asyncio
async def test_execute_skips_non_final_events():
non_final = _make_event(is_final=False, text="intermediate")
final = _make_event(is_final=True, text="final answer")
runner = _make_runner(events=[non_final, final])
executor = _make_executor(runner=runner)
ctx = _make_context("question")
eq = AsyncMock()
await executor.execute(ctx, eq)
enqueued = eq.enqueue_event.call_args[0][0]
assert enqueued == "final answer"
@pytest.mark.asyncio
async def test_execute_fallback_when_no_final_response_events():
non_final = _make_event(is_final=False)
runner = _make_runner(events=[non_final])
executor = _make_executor(runner=runner)
ctx = _make_context("hello")
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
@pytest.mark.asyncio
async def test_execute_fallback_when_response_is_none():
event = MagicMock()
event.is_final_response = MagicMock(return_value=True)
event.response = None # no response object
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("ping")
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
@pytest.mark.asyncio
async def test_execute_fallback_when_parts_have_no_text():
part = MagicMock()
part.text = None # no text on the part
event = MagicMock()
event.is_final_response = MagicMock(return_value=True)
event.response = MagicMock()
event.response.content = MagicMock()
event.response.content.parts = [part]
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("ping")
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
@pytest.mark.asyncio
async def test_execute_fallback_when_response_content_is_none():
event = MagicMock()
event.is_final_response = MagicMock(return_value=True)
event.response = MagicMock()
event.response.content = None # content is None → MissingContent sentinel
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("ping")
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
@pytest.mark.asyncio
async def test_execute_uses_context_id_as_session_id():
event = _make_event(is_final=True, text="ok")
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("hello", context_id="ctx-abc-123")
eq = AsyncMock()
await executor.execute(ctx, eq)
runner.run_async.assert_called_once()
call_kwargs = runner.run_async.call_args[1]
assert call_kwargs["session_id"] == "ctx-abc-123"
assert call_kwargs["user_id"] == "molecule-user"
@pytest.mark.asyncio
async def test_execute_falls_back_to_default_session_id_when_context_id_is_none():
event = _make_event(is_final=True, text="ok")
runner = _make_runner(events=[event])
executor = _make_executor(runner=runner)
ctx = _make_context("hello")
ctx.context_id = None # override
eq = AsyncMock()
await executor.execute(ctx, eq)
call_kwargs = runner.run_async.call_args[1]
assert call_kwargs["session_id"] == "default-session"
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — execute: empty input
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_execute_empty_input_returns_error():
runner = _make_runner()
executor = _make_executor(runner=runner)
ctx = _make_empty_context()
eq = AsyncMock()
await executor.execute(ctx, eq)
eq.enqueue_event.assert_called_once_with(_NO_TEXT_MSG)
runner.run_async.assert_not_called()
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — execute: error handling
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_execute_api_error_returns_sanitized_message():
runner = _make_runner()
class _FakeAPIError(Exception):
pass
async def _raise(*args, **kwargs):
raise _FakeAPIError("api_key=secret token_limit_exceeded")
yield # make it an async generator
runner.run_async = MagicMock(return_value=_raise())
executor = _make_executor(runner=runner)
eq = AsyncMock()
await executor.execute(_make_context("hello"), eq)
enqueued = eq.enqueue_event.call_args[0][0]
assert enqueued == "Agent error: _FakeAPIError"
assert "secret" not in enqueued
@pytest.mark.asyncio
async def test_execute_api_error_is_logged(caplog):
import logging
runner = _make_runner()
async def _raise(*args, **kwargs):
raise ValueError("bad request")
yield # make it an async generator
runner.run_async = MagicMock(return_value=_raise())
executor = _make_executor(runner=runner)
with caplog.at_level(logging.ERROR, logger="adapter"):
await executor.execute(_make_context("hello"), AsyncMock())
assert any("execution error" in r.message.lower() for r in caplog.records)
# ---------------------------------------------------------------------------
# GoogleADKA2AExecutor — cancel
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_cancel_emits_canceled_event():
executor = _make_executor()
import a2a.types as a2a_types
class _TaskState:
canceled = "canceled"
class _TaskStatus:
def __init__(self, state):
self.state = state
class _TaskStatusUpdateEvent:
def __init__(self, status, final):
self.status = status
self.final = final
a2a_types.TaskState = _TaskState
a2a_types.TaskStatus = _TaskStatus
a2a_types.TaskStatusUpdateEvent = _TaskStatusUpdateEvent
eq = AsyncMock()
ctx = MagicMock()
await executor.cancel(ctx, eq)
eq.enqueue_event.assert_called_once()
event = eq.enqueue_event.call_args[0][0]
assert isinstance(event, _TaskStatusUpdateEvent)
assert event.status.state == "canceled"
assert event.final is True
# ---------------------------------------------------------------------------
# GoogleADKAdapter — identity methods
# ---------------------------------------------------------------------------
def test_adapter_name():
assert GoogleADKAdapter.name() == "google-adk"
def test_adapter_display_name():
assert "Google ADK" in GoogleADKAdapter.display_name()
def test_adapter_description():
desc = GoogleADKAdapter.description()
assert "ADK" in desc or "Google" in desc
def test_adapter_get_config_schema():
schema = GoogleADKAdapter.get_config_schema()
assert schema["type"] == "object"
assert "agent_name" in schema["properties"]
assert "max_output_tokens" in schema["properties"]
assert "temperature" in schema["properties"]
# ---------------------------------------------------------------------------
# GoogleADKAdapter — setup
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_setup_succeeds_with_api_key(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "fake-api-key")
monkeypatch.delenv("GOOGLE_GENAI_USE_VERTEXAI", raising=False)
adapter = GoogleADKAdapter()
config = _make_adapter_config()
await adapter.setup(config)
assert adapter._setup_result is not None
assert adapter._setup_result.system_prompt == "mocked system prompt"
@pytest.mark.asyncio
async def test_setup_succeeds_with_vertex_ai(monkeypatch):
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
monkeypatch.setenv("GOOGLE_GENAI_USE_VERTEXAI", "1")
adapter = GoogleADKAdapter()
config = _make_adapter_config()
await adapter.setup(config)
assert adapter._setup_result is not None
@pytest.mark.asyncio
async def test_setup_succeeds_with_vertex_ai_true_string(monkeypatch):
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
monkeypatch.setenv("GOOGLE_GENAI_USE_VERTEXAI", "True")
adapter = GoogleADKAdapter()
config = _make_adapter_config()
await adapter.setup(config)
assert adapter._setup_result is not None
@pytest.mark.asyncio
async def test_setup_raises_without_credentials(monkeypatch):
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
monkeypatch.delenv("GOOGLE_GENAI_USE_VERTEXAI", raising=False)
adapter = GoogleADKAdapter()
config = _make_adapter_config()
with pytest.raises(RuntimeError, match="GOOGLE_API_KEY"):
await adapter.setup(config)
# ---------------------------------------------------------------------------
# GoogleADKAdapter — create_executor
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_create_executor_strips_google_prefix(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config(model="google:gemini-2.0-flash")
await adapter.setup(config)
executor = await adapter.create_executor(config)
assert executor.model == "gemini-2.0-flash"
@pytest.mark.asyncio
async def test_create_executor_no_prefix_passthrough(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config(model="gemini-1.5-pro")
await adapter.setup(config)
executor = await adapter.create_executor(config)
assert executor.model == "gemini-1.5-pro"
@pytest.mark.asyncio
async def test_create_executor_uses_setup_system_prompt(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config()
await adapter.setup(config)
executor = await adapter.create_executor(config)
assert executor.system_prompt == "mocked system prompt"
@pytest.mark.asyncio
async def test_create_executor_runtime_config_overrides(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config(
runtime_config={
"agent_name": "custom-agent",
"max_output_tokens": 512,
"temperature": 0.3,
}
)
await adapter.setup(config)
executor = await adapter.create_executor(config)
assert executor.agent_name == "custom-agent"
assert executor.max_output_tokens == 512
assert executor.temperature == 0.3
@pytest.mark.asyncio
async def test_create_executor_defaults_without_runtime_config(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config(runtime_config={})
await adapter.setup(config)
executor = await adapter.create_executor(config)
assert executor.agent_name == _DEFAULT_AGENT_NAME
assert executor.max_output_tokens == _DEFAULT_MAX_OUTPUT_TOKENS
assert executor.temperature == _DEFAULT_TEMPERATURE
@pytest.mark.asyncio
async def test_create_executor_without_setup_uses_config_system_prompt(monkeypatch):
"""create_executor without prior setup falls back to config.system_prompt."""
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config(system_prompt="fallback prompt")
# Intentionally skip setup() — _setup_result remains None
executor = await adapter.create_executor(config)
assert executor.system_prompt == "fallback prompt"
@pytest.mark.asyncio
async def test_create_executor_without_setup_no_system_prompt(monkeypatch):
"""create_executor without setup and no system_prompt → empty string."""
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
config = _make_adapter_config(system_prompt=None)
# Skip setup()
executor = await adapter.create_executor(config)
assert executor.system_prompt == ""
@pytest.mark.asyncio
async def test_create_executor_heartbeat_passed(monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
adapter = GoogleADKAdapter()
heartbeat = MagicMock()
config = _make_adapter_config(heartbeat=heartbeat)
await adapter.setup(config)
executor = await adapter.create_executor(config)
assert executor._heartbeat is heartbeat
# ---------------------------------------------------------------------------
# Adapter alias
# ---------------------------------------------------------------------------
def test_adapter_alias_is_google_adk_adapter():
assert Adapter is GoogleADKAdapter

View File

@ -55,6 +55,31 @@ else:
echo "=== Molecule AI Workspace ==="
echo "Runtime: $RUNTIME"
# ──────────────────────────────────────────────────────────
# GitHub credential helper — issue #547
# ──────────────────────────────────────────────────────────
# GitHub App installation tokens expire after ~60 min. The platform
# exposes GET /admin/github-installation-token (backed by the plugin's
# in-process refreshing cache) so workspaces can always get a valid
# token without restarting.
#
# Register molecule-git-token-helper.sh as the git credential helper for
# github.com. git calls it on every push/fetch; it hits the platform
# endpoint and emits a fresh token. Falls through to any existing
# credential helper (e.g. operator .env PAT) if the platform is
# unreachable.
#
# Idempotent — safe to re-run on restart.
HELPER_SCRIPT="/workspace-template/scripts/molecule-git-token-helper.sh"
if [ -f "${HELPER_SCRIPT}" ]; then
git config --global \
"credential.https://github.com.helper" \
"!${HELPER_SCRIPT}" 2>/dev/null || true
echo "[entrypoint] git credential helper registered (molecule-git-token-helper)"
else
echo "[entrypoint] WARNING: molecule-git-token-helper.sh not found at ${HELPER_SCRIPT} — GitHub tokens may expire after 60 min"
fi
# NOTE: Adapter-specific deps are now pre-installed in each adapter's Docker image
# (standalone template repos). Each image installs molecule-ai-workspace-runtime
# from PyPI plus the adapter-specific requirements. No per-runtime pip install needed here.

View File

@ -319,9 +319,25 @@ def _deep_merge_hooks(existing: dict, fragment: dict) -> dict:
out.setdefault("hooks", {})
for event, handlers in fragment.get("hooks", {}).items():
out["hooks"].setdefault(event, [])
out["hooks"][event].extend(handlers)
for key, val in fragment.items():
if key == "hooks":
# Build a set of already-present handler fingerprints so that
# re-installing the same plugin fragment does not append duplicates.
# Key: (matcher, frozenset-of-commands) — same logic the issue spec
# describes. Two handlers are considered identical when they watch the
# same matcher pattern and invoke exactly the same set of commands.
seen: set[tuple[str, frozenset[str]]] = {
(h.get("matcher", ""), frozenset(c.get("command", "") for c in h.get("hooks", [])))
for h in out["hooks"][event]
}
for handler in handlers:
hkey = (
handler.get("matcher", ""),
frozenset(c.get("command", "") for c in handler.get("hooks", [])),
)
if hkey not in seen:
seen.add(hkey)
out["hooks"][event].append(handler)
for top_key, val in fragment.items():
if top_key == "hooks":
continue
out.setdefault(key, val)
out.setdefault(top_key, val)
return out

View File

@ -0,0 +1,112 @@
#!/bin/bash
# molecule-git-token-helper.sh — git credential helper for GitHub App tokens
#
# Fetches a fresh GitHub App installation token from the Molecule AI
# platform endpoint GET /admin/github-installation-token on every git
# push/fetch, so workspace containers never use an expired GH_TOKEN after
# the ~60 min GitHub App token TTL.
#
# # Setup (called once at provision time or initial_prompt)
#
# git config --global \
# "credential.https://github.com.helper" \
# "!/workspace-template/scripts/molecule-git-token-helper.sh"
#
# # How git calls this helper
#
# git passes the action as the first positional arg. The protocol is:
# get → output credentials on stdout (we handle this)
# store → persist credentials (no-op — we never cache)
# erase → revoke credentials (no-op — platform manages lifecycle)
#
# On `get`, git reads key=value pairs terminated by an empty line.
# We must emit at minimum:
# username=x-access-token
# password=<token>
# (blank line)
#
# # Auth
#
# The platform endpoint requires a valid workspace bearer token. The
# token is stored at ${CONFIGS_DIR}/.auth_token (written by platform_auth.py
# on first /registry/register). Workspace env var PLATFORM_URL defaults
# to http://platform:8080.
#
# # Fallback
#
# If the platform endpoint is unreachable (e.g. network partition) or
# returns non-200, the script exits 1 without printing credentials so git
# will fall through to the next helper in the chain (if any). This
# preserves the operator's fallback PAT from .env if present.
#
# # gh CLI re-auth (30-min cron)
#
# To also fix `gh` CLI auth, run this from a workspace cron prompt:
#
# token=$(bash /workspace-template/scripts/molecule-git-token-helper.sh _fetch_token)
# echo "$token" | gh auth login --with-token
#
# (The _fetch_token private action returns only the raw token string.)
#
set -euo pipefail
PLATFORM_URL="${PLATFORM_URL:-http://platform:8080}"
CONFIGS_DIR="${CONFIGS_DIR:-/configs}"
TOKEN_FILE="${CONFIGS_DIR}/.auth_token"
ENDPOINT="${PLATFORM_URL}/admin/github-installation-token"
# _fetch_token — internal helper; also callable directly from cron.
# Outputs the raw token string on success; exits non-zero on failure.
_fetch_token() {
if [ ! -f "${TOKEN_FILE}" ]; then
echo "[molecule-git-token-helper] .auth_token not found at ${TOKEN_FILE}" >&2
exit 1
fi
bearer=$(cat "${TOKEN_FILE}" | tr -d '[:space:]')
if [ -z "${bearer}" ]; then
echo "[molecule-git-token-helper] .auth_token is empty" >&2
exit 1
fi
response=$(curl -sf \
-H "Authorization: Bearer ${bearer}" \
-H "Accept: application/json" \
--max-time 10 \
"${ENDPOINT}" 2>&1) || {
echo "[molecule-git-token-helper] platform request failed: ${response}" >&2
exit 1
}
# Parse {"token":"ghs_...","expires_at":"..."} with sed (no jq dependency).
token=$(echo "${response}" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
if [ -z "${token}" ]; then
echo "[molecule-git-token-helper] empty token in platform response: ${response}" >&2
exit 1
fi
echo "${token}"
}
ACTION="${1:-get}"
case "${ACTION}" in
get)
token=$(_fetch_token) || exit 1
# Emit git credential protocol response.
printf 'username=x-access-token\n'
printf 'password=%s\n' "${token}"
printf '\n'
;;
store|erase)
# No-op — the platform manages token lifecycle.
;;
_fetch_token)
# Private action for cron-based gh auth login --with-token.
_fetch_token
;;
*)
echo "[molecule-git-token-helper] unknown action: ${ACTION}" >&2
exit 1
;;
esac

View File

@ -7,6 +7,7 @@ Covers:
- Empty rules directory doesn't write an empty block
- README.md / CHANGELOG.md are skipped at the root (not treated as fragments)
- Uninstall is safe on a plugin that was never installed
- _deep_merge_hooks deduplication (issue #566)
"""
from __future__ import annotations
@ -393,3 +394,90 @@ async def test_setup_sh_absent_no_warning(tmp_path: Path):
result = await AgentskillsAdaptor("p", "claude_code").install(_make_ctx(configs, plugin))
assert result.warnings == []
# ---------------------------------------------------------------------------
# _deep_merge_hooks deduplication — issue #566
# ---------------------------------------------------------------------------
from plugins_registry.builtins import _deep_merge_hooks # noqa: E402
def _make_fragment(event: str, matcher: str, command: str) -> dict:
"""Build a minimal settings-fragment dict for one hook handler."""
return {
"hooks": {
event: [
{
"matcher": matcher,
"hooks": [{"type": "command", "command": command}],
}
]
}
}
def test_deep_merge_hooks_first_install_adds_handler():
"""Merging into an empty dict adds the handler exactly once."""
result = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
handlers = result["hooks"]["PreToolUse"]
assert len(handlers) == 1
assert handlers[0]["matcher"] == "Bash"
def test_deep_merge_hooks_dedup_on_reinstall():
"""Merging the same fragment twice must not duplicate the handler."""
fragment = _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh")
once = _deep_merge_hooks({}, fragment)
twice = _deep_merge_hooks(once, fragment)
assert len(twice["hooks"]["PreToolUse"]) == 1, (
"Re-installing the same fragment must not append a duplicate handler"
)
def test_deep_merge_hooks_dedup_three_reinstalls():
"""Issue #566 reported 34× duplication — verify three installs still yield one entry."""
fragment = _make_fragment("PostToolUse", "Write", "/hooks/format.sh")
state = {}
for _ in range(3):
state = _deep_merge_hooks(state, fragment)
assert len(state["hooks"]["PostToolUse"]) == 1
def test_deep_merge_hooks_different_matchers_both_kept():
"""Two handlers with different matchers must co-exist — dedup must not over-filter."""
state = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
state = _deep_merge_hooks(state, _make_fragment("PreToolUse", "Edit", "/hooks/lint.sh"))
assert len(state["hooks"]["PreToolUse"]) == 2
def test_deep_merge_hooks_different_commands_both_kept():
"""Same matcher but different commands → both handlers must be kept."""
state = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
state = _deep_merge_hooks(state, _make_fragment("PreToolUse", "Bash", "/hooks/security.sh"))
assert len(state["hooks"]["PreToolUse"]) == 2
def test_deep_merge_hooks_existing_user_hooks_preserved():
"""Existing hooks in settings.json that don't match the fragment must survive."""
existing = {
"hooks": {
"PreToolUse": [
{"matcher": "Bash", "hooks": [{"type": "command", "command": "/user/custom.sh"}]}
]
}
}
fragment = _make_fragment("PreToolUse", "Edit", "/hooks/lint.sh")
result = _deep_merge_hooks(existing, fragment)
matchers = {h["matcher"] for h in result["hooks"]["PreToolUse"]}
assert matchers == {"Bash", "Edit"}
def test_deep_merge_hooks_top_level_keys_merged():
"""Non-hook top-level keys in the fragment are merged into the output."""
existing = {"someKey": "old"}
fragment = {"someKey": "new", "anotherKey": "value", "hooks": {}}
result = _deep_merge_hooks(existing, fragment)
# setdefault semantics: existing keys win, new keys are added
assert result["someKey"] == "old"
assert result["anotherKey"] == "value"