fix(merge): combine response_format (#498) and tools (#497) in hermes_executor

Both PRs restructured the same chat.completions.create() call to use a
create_kwargs dict. Resolved by keeping both __init__ params and both
conditionals in the create_kwargs block.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI · triage-operator 2026-04-17 07:03:22 +00:00
commit af00a6c128
87 changed files with 10969 additions and 228 deletions

View File

@ -1,39 +1,25 @@
name: publish-platform-image
# Builds and pushes the tenant-platform Docker image to GHCR whenever a
# commit lands on main. The private molecule-controlplane provisioner sets
# TENANT_IMAGE=ghcr.io/molecule-ai/platform:<tag> to spawn tenant Fly
# Machines from this image. See molecule-controlplane README for the pairing.
# Builds and pushes the platform Docker images to GHCR whenever a commit
# lands on main. EC2 tenant instances pull the tenant image from GHCR.
on:
push:
branches: [main]
paths:
# Only rebuild when something platform-relevant changes — saves GHA
# minutes on docs-only / canvas-only / MCP-only PRs.
- 'platform/**'
- 'canvas/**'
- 'manifest.json'
- '.github/workflows/publish-platform-image.yml'
# Templates now live in standalone repos — template changes no longer
# trigger a platform rebuild. Use workflow_dispatch to manually rebuild
# if a template repo update needs to be baked into the image.
# Manual trigger for re-publishing a tag after a non-platform merge.
workflow_dispatch:
permissions:
contents: read
packages: write # required to push to ghcr.io/${{ github.repository_owner }}/*
packages: write
env:
# GHCR accepts mixed-case, but most tooling lowercases — keep us consistent.
IMAGE_NAME: ghcr.io/molecule-ai/platform
# Fly registry mirror — tenant machines provisioned by the private
# `molecule-controlplane` pull from here (private GHCR image can't be
# pulled by Fly machines without auth plumbing we don't want to add).
# Fly auto-authenticates same-org machines against registry.fly.io, so
# mirroring keeps GHCR private while tenants still boot.
FLY_IMAGE_NAME: registry.fly.io/molecule-tenant
TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
jobs:
build-and-push:
@ -42,83 +28,33 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Configure registry auth (write auths map; do NOT call docker login)
# `docker login` on macOS unconditionally writes credentials to the
# osxkeychain credential helper, even when DOCKER_CONFIG/config.json
# declares `credsStore: ""` and even when invoked with `--config`.
# Verified locally 2026-04-16 — after a successful login, Docker
# rewrites the same config file to:
# { "auths": { "ghcr.io": {} }, "credsStore": "osxkeychain" }
# i.e. the auth lives in the Keychain, not the config file. The
# Mac mini runner is a launchd user agent with a locked Keychain,
# so storage fails with `User interaction is not allowed (-25308)`.
#
# Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling
# `docker login` and tried to coerce credsStore — none worked.
# The only reliable fix is to skip `docker login` entirely and write
# the auth strings directly. `docker/build-push-action@v5` and the
# daemon honor the `auths` map for push without needing login.
#
# Fly registry username MUST be literal "x" (verified 2026-04-15) —
# any other value returns 401. FLY_API_TOKEN lives in GitHub Actions
# secrets AND in `fly secrets` on molecule-cp; see
# docs/runbooks/saas-secrets.md before rotating.
- name: Configure GHCR auth
shell: bash
env:
GHCR_USER: ${{ github.actor }}
GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
FLY_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
set -eu
mkdir -p "${RUNNER_TEMP}/docker-config"
GHCR_AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64)
FLY_AUTH=$(printf '%s:%s' 'x' "${FLY_TOKEN}" | base64)
umask 077
cat > "${RUNNER_TEMP}/docker-config/config.json" <<JSON
{
"auths": {
"ghcr.io": { "auth": "${GHCR_AUTH}" },
"registry.fly.io": { "auth": "${FLY_AUTH}" }
}
}
JSON
printf '{"auths":{"ghcr.io":{"auth":"%s"}}}' "${GHCR_AUTH}" > "${RUNNER_TEMP}/docker-config/config.json"
echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config" >> "${GITHUB_ENV}"
# Diagnostics that don't leak the tokens.
echo "=== docker ==="
command -v docker || echo "(docker not in PATH)"
docker --version 2>&1 || true
ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true
echo "=== auths registries (no values) ==="
grep -o '"[a-zA-Z0-9.-]*\.io"' "${RUNNER_TEMP}/docker-config/config.json" || true
- name: Set up QEMU
# Required on the Apple-silicon self-hosted runner — Fly tenant machines
# pull linux/amd64, and buildx needs binfmt handlers in Docker Desktop's
# VM to emulate amd64 during the build.
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64
- name: Set up Docker Buildx
# Buildx enables cache-from/cache-to via GHA cache and multi-arch
# builds without local docker daemon wrangling.
uses: docker/setup-buildx-action@v3
- name: Compute tags
id: tags
# Emit two tags per build: `latest` (floating, always the main tip)
# and the short commit SHA (immutable, pin-friendly). Control plane
# can deploy `latest` today and pin to :sha in Phase H hardening.
run: |
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Build & push to GHCR
# Split from the Fly mirror so a registry.fly.io outage doesn't block
# GHCR (or vice versa) — each registry's failure mode is isolated.
# GHA cache is shared because both steps re-use the same Dockerfile
# context + build args.
# Explicit linux/amd64 target: the runner is Apple-silicon (arm64),
# but Fly tenant machines are amd64. QEMU handles the emulation.
- name: Build & push platform image to GHCR
uses: docker/build-push-action@v5
with:
context: .
@ -133,13 +69,9 @@ jobs:
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform (one instance per org)
org.opencontainers.image.description=Molecule AI platform (Go API server)
- name: Build & push tenant image to Fly registry
# Tenant image = Go platform + Canvas (Next.js) in one container.
# Uses Dockerfile.tenant which includes the canvas build + reverse proxy.
# Continues even if GHCR push failed.
if: always()
- name: Build & push tenant image to GHCR
uses: docker/build-push-action@v5
with:
context: .
@ -147,31 +79,11 @@ jobs:
platforms: linux/amd64
push: true
tags: |
${{ env.FLY_IMAGE_NAME }}:latest
${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
${{ env.TENANT_IMAGE_NAME }}:latest
${{ env.TENANT_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform + canvas (one instance per org)
- name: Install flyctl
uses: superfly/flyctl-actions/setup-flyctl@master
- name: Deploy to Fly tenant machines
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
MACHINES=$(flyctl machines list -a molecule-tenant --json | jq -r '.[] | select(.state == "started" or .state == "stopped") | .id')
if [ -z "$MACHINES" ]; then
echo "No tenant machines found — skipping deploy (control plane provisions on demand)"
exit 0
fi
for id in $MACHINES; do
echo "Updating machine $id to sha-${{ steps.tags.outputs.sha }}..."
flyctl machines update "$id" \
--image "${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}" \
-a molecule-tenant \
--yes
done
echo "All tenant machines updated to sha-${{ steps.tags.outputs.sha }}"
org.opencontainers.image.description=Molecule AI tenant platform + canvas (one EC2 instance per org)

4
.gitignore vendored
View File

@ -44,6 +44,10 @@ venv/
*.egg-info/
.pytest_cache/
# Brand monitor runtime state (never commit)
brand-monitor/.surge_state.json
brand-monitor/.monitor_state.json
# Docker
*.log

View File

@ -1,15 +1,20 @@
"use client";
import { useEffect } from "react";
import { useEffect, useState } from "react";
import { Canvas } from "@/components/Canvas";
import { Legend } from "@/components/Legend";
import { CommunicationOverlay } from "@/components/CommunicationOverlay";
import { Spinner } from "@/components/Spinner";
import { connectSocket, disconnectSocket } from "@/store/socket";
import { useCanvasStore } from "@/store/canvas";
import { api } from "@/lib/api";
import type { WorkspaceData } from "@/store/socket";
export default function Home() {
const hydrationError = useCanvasStore((s) => s.hydrationError);
const setHydrationError = useCanvasStore((s) => s.setHydrationError);
const [hydrating, setHydrating] = useState(true);
useEffect(() => {
connectSocket();
@ -23,8 +28,13 @@ export default function Home() {
useCanvasStore.getState().setViewport(viewport);
}
}).catch((err) => {
// Initial hydration failed — socket reconnect will retry
// Initial hydration failed — show error banner to user
console.error("Canvas: initial hydration failed", err);
useCanvasStore.getState().setHydrationError(
err instanceof Error && err.message ? err.message : "Failed to load canvas"
);
}).finally(() => {
setHydrating(false);
});
return () => {
@ -32,11 +42,39 @@ export default function Home() {
};
}, []);
if (hydrating) {
return (
<div className="fixed inset-0 flex items-center justify-center bg-zinc-950">
<div className="flex flex-col items-center gap-3">
<Spinner size="lg" />
<span className="text-xs text-zinc-500">Loading canvas...</span>
</div>
</div>
);
}
return (
<>
<Canvas />
<Legend />
<CommunicationOverlay />
{hydrationError && (
<div
role="alert"
className="fixed inset-0 flex flex-col items-center justify-center bg-zinc-950 text-zinc-300 gap-4 z-[9999]"
>
<p className="text-zinc-400 text-sm">{hydrationError}</p>
<button
onClick={() => {
setHydrationError(null);
window.location.reload();
}}
className="px-4 py-2 bg-blue-600 hover:bg-blue-500 text-white rounded-md text-sm"
>
Retry
</button>
</div>
)}
</>
);
}

View File

@ -235,6 +235,14 @@ export function ContextMenu() {
closeContextMenu();
}, [contextMenu, nestNode, closeContextMenu]);
const handleZoomToTeam = useCallback(() => {
if (!contextMenu) return;
window.dispatchEvent(
new CustomEvent("molecule:zoom-to-team", { detail: { nodeId: contextMenu.nodeId } })
);
closeContextMenu();
}, [contextMenu, closeContextMenu]);
if (!contextMenu) return null;
const isOfflineOrFailed = contextMenu.nodeData.status === "offline" || contextMenu.nodeData.status === "failed";
@ -253,7 +261,10 @@ export function ContextMenu() {
? [{ label: "Extract from Team", icon: "⤴", action: handleRemoveFromTeam }]
: []),
...(hasChildren
? [{ label: "Collapse Team", icon: "◁", action: handleCollapse }]
? [
{ label: "Collapse Team", icon: "◁", action: handleCollapse },
{ label: "Zoom to Team", icon: "⊕", action: handleZoomToTeam },
]
: [{ label: "Expand to Team", icon: "▷", action: handleExpand }]),
{ label: "", icon: "", action: () => {}, divider: true },
...(isPaused

View File

@ -1,6 +1,6 @@
"use client";
import { useState, useEffect } from "react";
import { useState, useEffect, useRef, useCallback, useId } from "react";
import * as Dialog from "@radix-ui/react-dialog";
import { api } from "@/lib/api";
@ -42,6 +42,7 @@ export function CreateWorkspaceButton() {
const [tier, setTier] = useState(1);
const [template, setTemplate] = useState("");
const [parentId, setParentId] = useState("");
const [budgetLimit, setBudgetLimit] = useState("");
const [creating, setCreating] = useState(false);
const [error, setError] = useState<string | null>(null);
const [workspaces, setWorkspaces] = useState<WorkspaceOption[]>([]);
@ -50,6 +51,33 @@ export function CreateWorkspaceButton() {
const [hermesProvider, setHermesProvider] = useState("anthropic");
const [hermesApiKey, setHermesApiKey] = useState("");
// Refs for roving tabIndex on the tier radio group (WCAG 2.1 arrow-key nav)
const radioRefs = useRef<Array<HTMLButtonElement | null>>([]);
const TIERS = [
{ value: 1, label: "T1", desc: "Sandboxed" },
{ value: 2, label: "T2", desc: "Standard" },
{ value: 3, label: "T3", desc: "Full Access" },
];
const handleRadioKeyDown = useCallback(
(e: React.KeyboardEvent, currentIndex: number) => {
if (e.key === "ArrowDown" || e.key === "ArrowRight") {
e.preventDefault();
const next = (currentIndex + 1) % TIERS.length;
setTier(TIERS[next].value);
radioRefs.current[next]?.focus();
} else if (e.key === "ArrowUp" || e.key === "ArrowLeft") {
e.preventDefault();
const prev = (currentIndex - 1 + TIERS.length) % TIERS.length;
setTier(TIERS[prev].value);
radioRefs.current[prev]?.focus();
}
},
// TIERS is stable (module-level constant pattern), setTier is stable from useState
// eslint-disable-next-line react-hooks/exhaustive-deps
[]
);
const isHermes = template.trim().toLowerCase() === "hermes";
// Reset form and load workspaces whenever dialog opens
@ -60,6 +88,7 @@ export function CreateWorkspaceButton() {
setTier(1);
setTemplate("");
setParentId("");
setBudgetLimit("");
setError(null);
setHermesProvider("anthropic");
setHermesApiKey("");
@ -86,12 +115,17 @@ export function CreateWorkspaceButton() {
: undefined;
try {
const parsedBudget = budgetLimit.trim()
? parseFloat(budgetLimit)
: null;
await api.post("/workspaces", {
name: name.trim(),
role: role.trim() || undefined,
template: template.trim() || undefined,
tier,
parent_id: parentId || undefined,
budget_limit: parsedBudget,
canvas: { x: Math.random() * 400 + 100, y: Math.random() * 300 + 100 },
...(isHermes && provider
? { secrets: { [provider.envVar]: hermesApiKey.trim() } }
@ -155,6 +189,14 @@ export function CreateWorkspaceButton() {
onChange={setRole}
placeholder="e.g. SEO Specialist"
/>
<InputField
label="Budget limit (USD)"
value={budgetLimit}
onChange={setBudgetLimit}
placeholder="e.g. 100"
type="number"
helper="Leave blank for unlimited"
/>
<InputField
label="Template"
value={template}
@ -172,16 +214,15 @@ export function CreateWorkspaceButton() {
<div className="col-span-3 text-[11px] text-zinc-400 mb-1">
Tier
</div>
{[
{ value: 1, label: "T1", desc: "Sandboxed" },
{ value: 2, label: "T2", desc: "Standard" },
{ value: 3, label: "T3", desc: "Full Access" },
].map((t) => (
{TIERS.map((t, idx) => (
<button
key={t.value}
ref={(el) => { radioRefs.current[idx] = el; }}
role="radio"
aria-checked={tier === t.value}
tabIndex={tier === t.value ? 0 : -1}
onClick={() => setTier(t.value)}
onKeyDown={(e) => handleRadioKeyDown(e, idx)}
className={`py-2 rounded-lg text-center transition-colors ${
tier === t.value
? "bg-blue-600/20 border border-blue-500/50 text-blue-300"
@ -315,6 +356,8 @@ function InputField({
placeholder,
required,
mono,
type = "text",
helper,
}: {
label: string;
value: string;
@ -322,10 +365,16 @@ function InputField({
placeholder?: string;
required?: boolean;
mono?: boolean;
type?: string;
helper?: string;
}) {
// useId() generates a stable, unique ID for the label↔input association,
// satisfying WCAG 2.1 SC 1.3.1 (Info and Relationships, Level A).
const inputId = useId();
return (
<div>
<label className="text-[11px] text-zinc-400 block mb-1">
<label htmlFor={inputId} className="text-[11px] text-zinc-400 block mb-1">
{label}{" "}
{required && (
<>
@ -337,11 +386,18 @@ function InputField({
)}
</label>
<input
id={inputId}
type={type}
value={value}
onChange={(e) => onChange(e.target.value)}
placeholder={placeholder}
className={`w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 placeholder-zinc-600 focus:outline-none focus:border-blue-500/60 focus:ring-1 focus:ring-blue-500/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
min={type === "number" ? "0" : undefined}
step={type === "number" ? "0.01" : undefined}
className={`w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 placeholder-zinc-500 focus:outline-none focus:border-blue-500/60 focus:ring-1 focus:ring-blue-500/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
/>
{helper && (
<p className="mt-1 text-xs text-zinc-500">{helper}</p>
)}
</div>
);
}

View File

@ -173,7 +173,7 @@ export function SidePanel() {
else if (e.key === "End") { e.preventDefault(); next = TABS.length - 1; }
if (next !== null) {
setPanelTab(TABS[next].id);
requestAnimationFrame(() => { document.getElementById(`tab-${TABS[next!].id}`)?.focus(); });
requestAnimationFrame(() => { const el = document.getElementById(`tab-${TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
}
}}
>

View File

@ -0,0 +1,137 @@
'use client';
import { useState, useEffect } from "react";
import { api } from "@/lib/api";
export interface WorkspaceUsageProps {
workspaceId: string;
}
interface WorkspaceMetrics {
input_tokens: number;
output_tokens: number;
total_calls: number;
estimated_cost_usd: string;
period_start: string;
period_end: string;
}
export function WorkspaceUsage({ workspaceId }: WorkspaceUsageProps) {
const [metrics, setMetrics] = useState<WorkspaceMetrics | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
let ignore = false;
setLoading(true);
setError(null);
api
.get<WorkspaceMetrics>(`/workspaces/${workspaceId}/metrics`)
.then((data) => {
if (!ignore) setMetrics(data);
})
.catch((e) => {
if (!ignore)
setError(e instanceof Error ? e.message : "Failed to load metrics");
})
.finally(() => {
if (!ignore) setLoading(false);
});
return () => {
ignore = true;
};
}, [workspaceId]);
return (
<div
className="rounded-md border border-zinc-700 bg-zinc-900 p-3 space-y-2"
data-testid="workspace-usage"
>
<div className="flex items-center justify-between">
<h4 className="text-xs font-semibold text-zinc-400 uppercase tracking-wider">
Usage
</h4>
{!loading && metrics && (
<span
className="text-[10px] text-zinc-600 font-mono"
data-testid="usage-period"
>
{formatPeriod(metrics.period_start, metrics.period_end)}
</span>
)}
</div>
<div className="space-y-1.5" data-testid="usage-stats">
{loading ? (
<>
<SkeletonRow />
<SkeletonRow />
<SkeletonRow />
</>
) : error ? (
<p className="text-xs text-red-400" data-testid="usage-error">
{error}
</p>
) : metrics ? (
<>
<StatRow
label="Input tokens"
value={`${metrics.input_tokens.toLocaleString()} tokens`}
testId="usage-input-tokens"
/>
<StatRow
label="Output tokens"
value={`${metrics.output_tokens.toLocaleString()} tokens`}
testId="usage-output-tokens"
/>
<StatRow
label="Estimated cost"
value={`$${parseFloat(metrics.estimated_cost_usd).toFixed(6)}`}
testId="usage-estimated-cost"
/>
</>
) : null}
</div>
</div>
);
}
function formatPeriod(start: string, end: string): string {
const fmt = (s: string) =>
new Date(s).toLocaleDateString(undefined, {
month: "short",
day: "numeric",
});
return `${fmt(start)} ${fmt(end)}`;
}
function SkeletonRow() {
return (
<div
className="flex justify-between items-center animate-pulse"
data-testid="usage-skeleton-row"
>
<div className="h-3 w-20 rounded bg-zinc-700" />
<div className="h-3 w-16 rounded bg-zinc-700" />
</div>
);
}
function StatRow({
label,
value,
testId,
}: {
label: string;
value: string;
testId?: string;
}) {
return (
<div className="flex justify-between items-center" data-testid={testId}>
<span className="text-xs text-zinc-500">{label}</span>
<span className="text-xs text-zinc-400 font-mono">{value}</span>
</div>
);
}

View File

@ -0,0 +1,230 @@
// @vitest-environment jsdom
/**
* DetailsTab integration tests for issue #541.
*
* Budget-specific logic (stats, progress bar, PATCH /budget, 402 handling) is
* fully covered by BudgetSection.test.tsx this file focuses on:
* 1. BudgetSection being mounted inside DetailsTab
* 2. The workspace edit form (name / role / tier) no longer carrying
* budget_limit that concern lives in BudgetSection now
* 3. PATCH /workspaces/:id body integrity (no accidental budget_limit leak)
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
// ── Mocks ─────────────────────────────────────────────────────────────────────
vi.mock("@/lib/api", () => ({
api: {
get: vi.fn(),
patch: vi.fn(),
del: vi.fn(),
post: vi.fn(),
},
}));
vi.mock("@/store/canvas", () => ({
useCanvasStore: vi.fn((selector: (s: unknown) => unknown) =>
selector({
updateNodeData: mockUpdateNodeData,
removeNode: vi.fn(),
selectNode: vi.fn(),
})
),
}));
vi.mock("../StatusDot", () => ({ StatusDot: () => null }));
// Mock BudgetSection — it has its own test suite (BudgetSection.test.tsx).
// Without this mock its internal api.get would fire against the shared mock
// and cause type errors when the return is not a valid BudgetData object.
vi.mock("../tabs/BudgetSection", () => ({
BudgetSection: ({ workspaceId }: { workspaceId: string }) => (
<div data-testid="budget-section-stub" data-ws={workspaceId} />
),
}));
// Mock WorkspaceUsage — it has its own test suite (WorkspaceUsage.test.tsx).
// Without this mock its internal api.get call races against the shared mock
// and crashes when the return value is not a valid WorkspaceMetrics object.
vi.mock("../WorkspaceUsage", () => ({
WorkspaceUsage: ({ workspaceId }: { workspaceId: string }) => (
<div data-testid="workspace-usage-stub" data-ws={workspaceId} />
),
}));
import { api } from "@/lib/api";
import { DetailsTab } from "../tabs/DetailsTab";
const mockPatch = vi.mocked(api.patch);
const mockGet = vi.mocked(api.get);
const mockUpdateNodeData = vi.fn();
// ── Helpers ───────────────────────────────────────────────────────────────────
function makeData(overrides: Record<string, unknown> = {}) {
return {
name: "Test Agent",
role: "Researcher",
tier: 1,
status: "online",
agentCard: null,
activeTasks: 0,
collapsed: false,
lastErrorRate: 0,
lastSampleError: "",
url: "http://localhost:8080",
parentId: null,
currentTask: "",
runtime: "langgraph",
needsRestart: false,
budgetLimit: null,
budgetUsed: null,
...overrides,
};
}
beforeEach(() => {
vi.clearAllMocks();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue([] as any);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValue({} as any);
});
afterEach(() => {
cleanup();
});
async function openEdit() {
const editBtn = screen.getAllByRole("button").find((b) => b.textContent === "Edit");
fireEvent.click(editBtn!);
await waitFor(() =>
expect(screen.getAllByRole("button").some((b) => b.textContent === "Save")).toBe(true)
);
}
// ── BudgetSection mounting ────────────────────────────────────────────────────
describe("DetailsTab — BudgetSection integration", () => {
it("renders BudgetSection with the correct workspaceId", () => {
render(<DetailsTab workspaceId="ws-42" data={makeData()} />);
const stub = screen.getByTestId("budget-section-stub");
expect(stub).toBeTruthy();
expect(stub.getAttribute("data-ws")).toBe("ws-42");
});
});
// ── Workspace edit form (no budget_limit) ──────────────────────────────────────
describe("DetailsTab — workspace edit form does not include budget_limit", () => {
it("does NOT show a 'Budget limit (USD)' input in the edit form", async () => {
render(<DetailsTab workspaceId="ws-1" data={makeData()} />);
await openEdit();
// Budget limit (USD) was the old inline field label — must be absent now
expect(screen.queryByPlaceholderText("Leave blank for unlimited")).toBeNull();
expect(screen.queryByText("Budget limit (USD)")).toBeNull();
});
it("PATCH /workspaces/:id body does NOT include budget_limit", async () => {
render(<DetailsTab workspaceId="ws-1" data={makeData({ name: "My Agent" })} />);
await openEdit();
const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
fireEvent.click(saveBtn!);
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(Object.prototype.hasOwnProperty.call(body, "budget_limit")).toBe(false);
});
it("PATCH /workspaces/:id body includes name, role, and tier", async () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ name: "Alpha", role: "Writer", tier: 2 })}
/>
);
await openEdit();
const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
fireEvent.click(saveBtn!);
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.name).toBe("Alpha");
expect(body.role).toBe("Writer");
expect(body.tier).toBe(2);
});
it("Cancel reverts name, role, tier without touching budget state", async () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ name: "Original", role: "Dev" })}
/>
);
await openEdit();
// Modify name
fireEvent.change(
screen.getAllByRole("textbox").find((i) => (i as HTMLInputElement).value === "Original")!,
{ target: { value: "Modified" } }
);
const cancelBtn = screen.getAllByRole("button").find((b) => b.textContent === "Cancel");
fireEvent.click(cancelBtn!);
// Should be back in read view — no Save button visible
expect(screen.queryAllByRole("button").some((b) => b.textContent === "Save")).toBe(false);
// Workspace info unchanged in read view
expect(screen.getByText("Original")).toBeTruthy();
});
it("updateNodeData is called with name/role/tier but NOT budgetLimit on save", async () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ name: "Bot", role: "Analyst", tier: 1 })}
/>
);
await openEdit();
const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
fireEvent.click(saveBtn!);
await waitFor(() => expect(mockUpdateNodeData).toHaveBeenCalled());
const updateArgs = mockUpdateNodeData.mock.calls[0][1] as Record<string, unknown>;
expect(updateArgs.name).toBe("Bot");
expect(updateArgs.role).toBe("Analyst");
expect(updateArgs.tier).toBe(1);
expect(Object.prototype.hasOwnProperty.call(updateArgs, "budgetLimit")).toBe(false);
});
});
// ── budget-exceeded-badge removed from DetailsTab ────────────────────────────
describe("DetailsTab — no inline budget-exceeded-badge", () => {
it("does NOT render budget-exceeded-badge even when budgetUsed > budgetLimit (BudgetSection owns that)", () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ budgetLimit: 10, budgetUsed: 99 })}
/>
);
// The old inline badge is gone — BudgetSection.tsx owns the exceeded state
expect(screen.queryByTestId("budget-exceeded-badge")).toBeNull();
});
it("does NOT render inline Budget limit row in read view", () => {
render(
<DetailsTab
workspaceId="ws-1"
data={makeData({ budgetLimit: 100 })}
/>
);
// "$100.00" and "Unlimited" are rendered by BudgetSection now
expect(screen.queryByText("$100.00")).toBeNull();
expect(screen.queryByText("Unlimited")).toBeNull();
});
});

View File

@ -0,0 +1,389 @@
// @vitest-environment jsdom
/**
* Tests for BudgetSection (issue #541).
*
* Covers:
* - Loading state
* - Stats row: used / limit, "Unlimited" when null
* - Progress bar: correct percentage, capped at 100%, absent when no limit
* - Budget remaining text
* - Input pre-fill (existing limit / blank when null)
* - Save: PATCH with number, PATCH with null (blank input)
* - 402 on GET exceeded banner, no fetch-error text
* - 402 on PATCH exceeded banner
* - Non-402 fetch error error text
* - Non-402 save error save error alert
* - Section header and subheading
* - Fetch error does not show stats
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import {
render,
screen,
fireEvent,
waitFor,
cleanup,
act,
} from "@testing-library/react";
// ── Mock api ──────────────────────────────────────────────────────────────────
vi.mock("@/lib/api", () => ({
api: {
get: vi.fn(),
patch: vi.fn(),
},
}));
import { api } from "@/lib/api";
import { BudgetSection } from "../tabs/BudgetSection";
const mockGet = vi.mocked(api.get);
const mockPatch = vi.mocked(api.patch);
// ── Helpers ───────────────────────────────────────────────────────────────────
function budgetResponse(overrides: Partial<{
budget_limit: number | null;
budget_used: number;
budget_remaining: number | null;
}> = {}) {
return {
budget_limit: 1000,
budget_used: 250,
budget_remaining: 750,
...overrides,
};
}
function make402Error(): Error {
return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
}
function make402PatchError(): Error {
return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
}
function makeGenericError(msg = "network timeout"): Error {
return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
}
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
cleanup();
});
// ── Rendering helpers ─────────────────────────────────────────────────────────
async function renderLoaded(budgetData = budgetResponse()) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetData as any);
render(<BudgetSection workspaceId="ws-1" />);
// Wait for loading to finish
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
}
// ── Loading state ─────────────────────────────────────────────────────────────
describe("BudgetSection — loading state", () => {
it("shows loading indicator while fetch is in flight", () => {
// Never resolve
mockGet.mockReturnValue(new Promise(() => {}));
render(<BudgetSection workspaceId="ws-1" />);
expect(screen.getByTestId("budget-loading")).toBeTruthy();
expect(screen.getByText("Loading…")).toBeTruthy();
});
it("hides loading indicator after fetch resolves", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetResponse() as any);
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
});
});
// ── Section header ────────────────────────────────────────────────────────────
describe("BudgetSection — header and subheading", () => {
it("renders 'Budget' as the section heading", async () => {
await renderLoaded();
expect(screen.getByText("Budget")).toBeTruthy();
});
it("renders the subheading 'Limit total message credits for this workspace'", async () => {
await renderLoaded();
expect(
screen.getByText("Limit total message credits for this workspace")
).toBeTruthy();
});
it("renders 'Budget limit (credits)' label for the input", async () => {
await renderLoaded();
expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
});
});
// ── Stats row ─────────────────────────────────────────────────────────────────
describe("BudgetSection — stats row", () => {
it("shows budget_used in the stats row", async () => {
await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
});
it("shows budget_limit in the stats row", async () => {
await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
});
it("shows 'Unlimited' when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
});
it("shows budget_remaining when present", async () => {
await renderLoaded(budgetResponse({ budget_remaining: 750 }));
expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
});
it("hides budget_remaining row when null", async () => {
await renderLoaded(budgetResponse({ budget_remaining: null }));
expect(screen.queryByTestId("budget-remaining")).toBeNull();
});
});
// ── Progress bar ──────────────────────────────────────────────────────────────
describe("BudgetSection — progress bar", () => {
it("renders the progress bar when budget_limit is set", async () => {
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
expect(screen.getByRole("progressbar")).toBeTruthy();
});
it("does NOT render progress bar when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
expect(screen.queryByRole("progressbar")).toBeNull();
});
it("fills to the correct percentage (25%)", async () => {
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("25%");
});
it("fills to the correct percentage (50%)", async () => {
await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("50%");
});
it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("100%");
});
it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
const bar = screen.getByRole("progressbar");
expect(bar.getAttribute("aria-valuenow")).toBe("30");
});
});
// ── Input pre-fill ────────────────────────────────────────────────────────────
describe("BudgetSection — input pre-fill", () => {
it("pre-fills input with existing budget_limit", async () => {
await renderLoaded(budgetResponse({ budget_limit: 500 }));
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
expect(input.value).toBe("500");
});
it("leaves input empty when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
expect(input.value).toBe("");
});
});
// ── Save — PATCH calls ────────────────────────────────────────────────────────
describe("BudgetSection — save", () => {
it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "800" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(800);
});
it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
// Regression for QA bug report: `parseInt("0") || null` would yield null.
// The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "0" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(0);
expect(body.budget_limit).not.toBeNull();
});
it("sends budget_limit: null when input is blank", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeNull();
});
it("updates displayed stats after successful save", async () => {
const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(updated as any);
await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "2000" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
);
});
it("shows save error message on non-402 PATCH failure", async () => {
mockPatch.mockRejectedValueOnce(
new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
);
await renderLoaded();
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-save-error")).toBeTruthy()
);
expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
});
});
// ── 402 handling ──────────────────────────────────────────────────────────────
describe("BudgetSection — 402 handling", () => {
it("shows exceeded banner when GET returns 402", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
});
it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.queryByTestId("budget-loading")).toBeNull()
);
expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
});
it("shows exceeded banner when PATCH returns 402", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetResponse() as any);
mockPatch.mockRejectedValueOnce(make402PatchError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
// Should NOT also show the save-error alert
expect(screen.queryByTestId("budget-save-error")).toBeNull();
});
it("clears exceeded banner after a successful save", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
// Now a successful PATCH (limit was raised)
const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(updated as any);
await act(async () => {
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "5000" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
});
await waitFor(() =>
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
);
});
});
// ── Non-402 fetch error ───────────────────────────────────────────────────────
describe("BudgetSection — non-402 fetch errors", () => {
it("shows fetch error text on non-402 GET failure", async () => {
mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
);
expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
});
it("does NOT show stats row on fetch error", async () => {
mockGet.mockRejectedValueOnce(makeGenericError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
expect(screen.queryByTestId("budget-stats-row")).toBeNull();
});
it("does NOT show exceeded banner on non-402 fetch error", async () => {
mockGet.mockRejectedValueOnce(makeGenericError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
});
});

View File

@ -0,0 +1,230 @@
// @vitest-environment jsdom
/**
* Tests for issue #608 effort + task_budget fields in workspace config.
*
* Covers:
* 1. toYaml serialization (effort + task_budget YAML keys)
* 2. parseYaml round-trip (YAML ConfigData)
* 3. DEFAULT_CONFIG shape (new fields present with zero/empty defaults)
* 4. ConfigTab source assertions (section rendered conditionally)
* 5. React rendering of the section for claude-code and claude model configs
*/
import React from "react";
import { describe, it, expect, vi, afterEach } from "vitest";
import { render, screen, cleanup } from "@testing-library/react";
// ── Module-level mocks ───────────────────────────────────────────────────────
vi.mock("@/lib/api", () => ({
api: { get: vi.fn(), put: vi.fn(), patch: vi.fn(), post: vi.fn() },
}));
vi.mock("@/store/canvas", () => ({
useCanvasStore: vi.fn(() => ({
restartWorkspace: vi.fn(),
updateNodeData: vi.fn(),
})),
}));
vi.mock("../tabs/config/secrets-section", () => ({
SecretsSection: () => <div data-testid="secrets-stub" />,
}));
// ── Imports ──────────────────────────────────────────────────────────────────
import { toYaml, parseYaml } from "../tabs/config/yaml-utils";
import { DEFAULT_CONFIG, type ConfigData } from "../tabs/config/form-inputs";
import { ConfigTab } from "../tabs/ConfigTab";
import { api } from "@/lib/api";
afterEach(() => {
cleanup();
vi.clearAllMocks();
});
// ── 1. toYaml serialization ──────────────────────────────────────────────────
describe("toYaml — effort field", () => {
it("omits effort when empty string", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "" };
expect(toYaml(cfg)).not.toContain("effort:");
});
it("omits effort when undefined", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: undefined };
expect(toYaml(cfg)).not.toContain("effort:");
});
it("serializes effort: low", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "low" };
const yaml = toYaml(cfg);
expect(yaml).toContain("effort: low");
});
it("serializes effort: medium", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "medium" };
expect(toYaml(cfg)).toContain("effort: medium");
});
it("serializes effort: high", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "high" };
expect(toYaml(cfg)).toContain("effort: high");
});
it("serializes effort: xhigh", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "xhigh" };
expect(toYaml(cfg)).toContain("effort: xhigh");
});
});
describe("toYaml — task_budget field", () => {
it("omits task_budget when 0", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, task_budget: 0 };
expect(toYaml(cfg)).not.toContain("task_budget:");
});
it("omits task_budget when undefined", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, task_budget: undefined };
expect(toYaml(cfg)).not.toContain("task_budget:");
});
it("serializes task_budget: 10000", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, task_budget: 10000 };
expect(toYaml(cfg)).toContain("task_budget: 10000");
});
it("serializes task_budget: 50000", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, task_budget: 50000 };
expect(toYaml(cfg)).toContain("task_budget: 50000");
});
});
describe("toYaml — effort and task_budget together", () => {
it("serializes both when set", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "xhigh", task_budget: 32000 };
const yaml = toYaml(cfg);
expect(yaml).toContain("effort: xhigh");
expect(yaml).toContain("task_budget: 32000");
});
it("effort appears before task_budget in output", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "high", task_budget: 8000 };
const yaml = toYaml(cfg);
const effortIdx = yaml.indexOf("effort:");
const budgetIdx = yaml.indexOf("task_budget:");
expect(effortIdx).toBeGreaterThan(-1);
expect(budgetIdx).toBeGreaterThan(-1);
expect(effortIdx).toBeLessThan(budgetIdx);
});
});
// ── 2. parseYaml round-trip ──────────────────────────────────────────────────
describe("parseYaml — effort + task_budget round-trip", () => {
it("parses effort from YAML", () => {
const yaml = "name: Test\neffort: high\n";
const parsed = parseYaml(yaml);
expect(parsed.effort).toBe("high");
});
it("parses task_budget from YAML as integer", () => {
const yaml = "name: Test\ntask_budget: 16000\n";
const parsed = parseYaml(yaml);
expect(parsed.task_budget).toBe(16000);
});
it("round-trips effort: xhigh through toYaml → parseYaml", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "xhigh" };
const yaml = toYaml(cfg);
const parsed = parseYaml(yaml);
expect(parsed.effort).toBe("xhigh");
});
it("round-trips task_budget: 50000 through toYaml → parseYaml", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, task_budget: 50000 };
const yaml = toYaml(cfg);
const parsed = parseYaml(yaml);
expect(parsed.task_budget).toBe(50000);
});
it("round-trips both fields together", () => {
const cfg: ConfigData = { ...DEFAULT_CONFIG, effort: "low", task_budget: 1000 };
const yaml = toYaml(cfg);
const parsed = parseYaml(yaml);
expect(parsed.effort).toBe("low");
expect(parsed.task_budget).toBe(1000);
});
});
// ── 3. DEFAULT_CONFIG shape ──────────────────────────────────────────────────
describe("DEFAULT_CONFIG", () => {
it("has effort defaulting to empty string", () => {
expect(DEFAULT_CONFIG.effort).toBe("");
});
it("has task_budget defaulting to 0", () => {
expect(DEFAULT_CONFIG.task_budget).toBe(0);
});
});
// ── 4. ConfigTab source assertions ──────────────────────────────────────────
describe("ConfigTab source — Claude Settings section", () => {
it("ConfigTab.tsx contains the effort-select data-testid", async () => {
const { readFileSync } = await import("fs");
const { join } = await import("path");
const src = readFileSync(join(__dirname, "../../components/tabs/ConfigTab.tsx"), "utf8");
expect(src).toContain('data-testid="effort-select"');
expect(src).toContain('data-testid="task-budget-input"');
});
it("ConfigTab.tsx effort dropdown has all four Claude values", async () => {
const { readFileSync } = await import("fs");
const { join } = await import("path");
const src = readFileSync(join(__dirname, "../../components/tabs/ConfigTab.tsx"), "utf8");
expect(src).toContain('"low"');
expect(src).toContain('"medium"');
expect(src).toContain('"high"');
expect(src).toContain('"xhigh"');
});
it("ConfigTab.tsx section is guarded by claude-code runtime check", async () => {
const { readFileSync } = await import("fs");
const { join } = await import("path");
const src = readFileSync(join(__dirname, "../../components/tabs/ConfigTab.tsx"), "utf8");
expect(src).toContain('config.runtime === "claude-code"');
expect(src).toContain('"claude"');
});
});
// ── 5. React rendering ───────────────────────────────────────────────────────
describe("ConfigTab — Claude Settings section rendering", () => {
function setupMock(configYaml: string) {
vi.mocked(api.get).mockResolvedValue({ content: configYaml } as never);
}
it("shows Claude Settings section for claude-code runtime", async () => {
setupMock("name: Bot\nruntime: claude-code\n");
render(<ConfigTab workspaceId="ws-1" />);
// Section title appears once loading resolves
const section = await screen.findByText("Claude Settings");
expect(section).toBeTruthy();
});
it("shows Claude Settings section when model contains claude", async () => {
setupMock("name: Bot\nmodel: anthropic:claude-opus-4-7\n");
render(<ConfigTab workspaceId="ws-1" />);
const section = await screen.findByText("Claude Settings");
expect(section).toBeTruthy();
});
it("does NOT show Claude Settings section for non-claude runtime/model", async () => {
setupMock("name: Bot\nruntime: crewai\nmodel: openai:gpt-4o\n");
render(<ConfigTab workspaceId="ws-1" />);
// Wait for load (config.yaml fetch resolves) then check absence
await screen.findByText("General"); // loaded
expect(screen.queryByText("Claude Settings")).toBeNull();
});
});

View File

@ -163,4 +163,50 @@ describe("ContextMenu — keyboard accessibility", () => {
const { container } = render(<ContextMenu />);
expect(container.firstChild).toBeNull();
});
// ── Zoom to Team (#557) ───────────────────────────────────────────────────
it("does NOT show 'Zoom to Team' when node has no children", () => {
mockStore.nodes = []; // no children
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const labels = items.map((el) => el.textContent ?? "");
expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(false);
});
it("shows 'Zoom to Team' when the node has children", () => {
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const labels = items.map((el) => el.textContent ?? "");
expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(true);
});
it("clicking 'Zoom to Team' dispatches molecule:zoom-to-team event", () => {
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
const dispatched: CustomEvent[] = [];
window.addEventListener("molecule:zoom-to-team", (e) => {
dispatched.push(e as CustomEvent);
});
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const zoomItem = items.find((el) => el.textContent?.includes("Zoom to Team"))!;
expect(zoomItem).toBeTruthy();
fireEvent.click(zoomItem);
expect(dispatched).toHaveLength(1);
expect(dispatched[0].detail.nodeId).toBe("ws-1");
window.removeEventListener("molecule:zoom-to-team", () => {});
});
it("clicking 'Zoom to Team' closes the context menu", () => {
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
render(<ContextMenu />);
const items = screen.getAllByRole("menuitem");
const zoomItem = items.find((el) => el.textContent?.includes("Zoom to Team"))!;
fireEvent.click(zoomItem);
expect(closeContextMenu).toHaveBeenCalled();
});
});

View File

@ -89,4 +89,144 @@ describe("CreateWorkspaceDialog — accessibility", () => {
expect(t2?.getAttribute("aria-checked")).toBe("true")
);
});
// ── Arrow-key navigation (WCAG 2.1 radio group) — Issue #556 ──────────────
it("selected radio has tabIndex=0, others have tabIndex=-1 (roving tabIndex)", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
// T1 is default selected
expect(t1.getAttribute("tabindex")).toBe("0");
expect(t2.getAttribute("tabindex")).toBe("-1");
expect(t3.getAttribute("tabindex")).toBe("-1");
});
it("ArrowDown moves selection from T1 to T2", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
t1.focus();
fireEvent.keyDown(t1, { key: "ArrowDown" });
await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
expect(t1.getAttribute("aria-checked")).toBe("false");
});
it("ArrowRight moves selection from T2 to T3", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
fireEvent.click(t2); // select T2 first
await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
t2.focus();
fireEvent.keyDown(t2, { key: "ArrowRight" });
await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
});
it("ArrowDown wraps from T3 back to T1", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
fireEvent.click(t3); // select T3 first
await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
t3.focus();
fireEvent.keyDown(t3, { key: "ArrowDown" });
await waitFor(() => expect(t1.getAttribute("aria-checked")).toBe("true"));
});
it("ArrowUp moves selection from T2 to T1", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
fireEvent.click(t2);
await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
t2.focus();
fireEvent.keyDown(t2, { key: "ArrowUp" });
await waitFor(() => expect(t1.getAttribute("aria-checked")).toBe("true"));
});
it("ArrowLeft wraps from T1 back to T3", async () => {
await openDialog();
const radios = screen.getAllByRole("radio");
const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
t1.focus();
fireEvent.keyDown(t1, { key: "ArrowLeft" });
await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
});
});
// ── WCAG 2.1 SC 1.3.1 — Programmatic label association (Issue #558) ──────────
//
// Every <input> rendered by the InputField helper must have a matching <label>
// via htmlFor/id so screen readers announce the field name, not just the
// placeholder. useId() in InputField generates stable unique IDs per render.
describe("CreateWorkspaceDialog — WCAG SC 1.3.1 label/input association", () => {
it("Name input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const nameInput = screen.getByPlaceholderText("e.g. SEO Agent") as HTMLInputElement;
expect(nameInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${nameInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Name");
});
it("Role input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const roleInput = screen.getByPlaceholderText("e.g. SEO Specialist") as HTMLInputElement;
expect(roleInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${roleInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Role");
});
it("Budget limit input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const budgetInput = screen.getByPlaceholderText("e.g. 100") as HTMLInputElement;
expect(budgetInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${budgetInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Budget limit");
});
it("Template input has a <label> whose htmlFor matches the input id", async () => {
await openDialog();
const templateInput = screen.getByPlaceholderText(
"e.g. seo-agent (from workspace-configs-templates/)"
) as HTMLInputElement;
expect(templateInput.id).toBeTruthy();
const label = document.querySelector(`label[for="${templateInput.id}"]`);
expect(label).toBeTruthy();
expect(label?.textContent).toContain("Template");
});
it("each InputField generates a distinct id (no id collisions)", async () => {
await openDialog();
const inputs = [
screen.getByPlaceholderText("e.g. SEO Agent"),
screen.getByPlaceholderText("e.g. SEO Specialist"),
screen.getByPlaceholderText("e.g. 100"),
screen.getByPlaceholderText("e.g. seo-agent (from workspace-configs-templates/)"),
] as HTMLInputElement[];
const ids = inputs.map((i) => i.id).filter(Boolean);
const unique = new Set(ids);
expect(unique.size).toBe(ids.length); // no duplicates
expect(ids.length).toBe(4);
});
it("Name label text contains the required asterisk indicator", async () => {
await openDialog();
const nameInput = screen.getByPlaceholderText("e.g. SEO Agent") as HTMLInputElement;
const label = document.querySelector(`label[for="${nameInput.id}"]`);
// aria-hidden asterisk * is present for visual required indicator
expect(label?.querySelector("[aria-hidden='true']")?.textContent).toBe("*");
});
});

View File

@ -299,3 +299,85 @@ describe("CreateWorkspaceDialog — Hermes provider picker", () => {
);
});
});
// ---------------------------------------------------------------------------
// budget_limit field tests (#541)
// ---------------------------------------------------------------------------
describe("CreateWorkspaceDialog — budget_limit field", () => {
it("renders a Budget limit (USD) input", async () => {
await openDialog();
const budgetInput = screen.getByPlaceholderText("e.g. 100");
expect(budgetInput).toBeTruthy();
});
it("renders helper text 'Leave blank for unlimited'", async () => {
await openDialog();
expect(screen.getByText("Leave blank for unlimited")).toBeTruthy();
});
it("sends budget_limit as a number when a value is entered", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Budget Agent" },
});
fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
target: { value: "250" },
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(250);
});
it("sends budget_limit as null when the field is left blank", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Unlimited Agent" },
});
// Leave budget_limit empty
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeNull();
});
it("sends budget_limit as a float when a decimal value is entered", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Float Budget Agent" },
});
fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
target: { value: "49.99" },
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeCloseTo(49.99);
});
it("resets budget_limit to empty when dialog is reopened", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
target: { value: "500" },
});
// Close dialog
const cancelBtn = screen.getAllByRole("button").find((b) =>
b.textContent === "Cancel"
);
fireEvent.click(cancelBtn!);
cleanup();
// Re-open
await openDialog();
const budgetInput = screen.getByPlaceholderText("e.g. 100") as HTMLInputElement;
expect(budgetInput.value).toBe("");
});
});

View File

@ -0,0 +1,148 @@
// @vitest-environment jsdom
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, waitFor, cleanup } from "@testing-library/react";
// Mock api before importing the component
vi.mock("@/lib/api", () => ({
api: {
get: vi.fn(),
},
}));
import { api } from "@/lib/api";
import { WorkspaceUsage } from "../WorkspaceUsage";
const mockGet = vi.mocked(api.get);
const METRICS_RESPONSE = {
input_tokens: 12345,
output_tokens: 678,
total_calls: 42,
estimated_cost_usd: "0.123456",
period_start: "2026-04-17T00:00:00Z",
period_end: "2026-04-18T00:00:00Z",
};
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
cleanup();
});
describe("WorkspaceUsage", () => {
it("renders the outer container without crashing", () => {
// Keep fetch pending so we can check initial state
mockGet.mockReturnValue(new Promise(() => {}));
const { container } = render(<WorkspaceUsage workspaceId="ws-1" />);
expect(container.firstChild).toBeTruthy();
});
it("renders the Usage heading", () => {
mockGet.mockReturnValue(new Promise(() => {}));
render(<WorkspaceUsage workspaceId="ws-1" />);
expect(screen.getByText("Usage")).toBeTruthy();
});
it("shows skeleton rows while loading", () => {
mockGet.mockReturnValue(new Promise(() => {}));
render(<WorkspaceUsage workspaceId="ws-1" />);
const skeletons = screen.getAllByTestId("usage-skeleton-row");
expect(skeletons.length).toBe(3);
});
it("calls GET /workspaces/:id/metrics with the correct workspaceId", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue(METRICS_RESPONSE as any);
render(<WorkspaceUsage workspaceId="ws-abc-123" />);
await waitFor(() => expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-abc-123/metrics"));
});
it("displays input tokens formatted with toLocaleString after load", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue(METRICS_RESPONSE as any);
render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => {
const row = screen.getByTestId("usage-input-tokens");
expect(row).toBeTruthy();
// 12345 formatted — locale-dependent but always has digits + "tokens"
expect(row.textContent).toContain("tokens");
expect(row.textContent).toContain("12");
});
});
it("displays output tokens formatted with toLocaleString after load", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue(METRICS_RESPONSE as any);
render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => {
const row = screen.getByTestId("usage-output-tokens");
expect(row).toBeTruthy();
expect(row.textContent).toContain("tokens");
expect(row.textContent).toContain("678");
});
});
it("displays estimated cost formatted as $X.XXXXXX after load", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue(METRICS_RESPONSE as any);
render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => {
const row = screen.getByTestId("usage-estimated-cost");
expect(row).toBeTruthy();
expect(row.textContent).toBe("Estimated cost$0.123456");
});
});
it("shows the stat rows and hides skeletons after successful load", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue(METRICS_RESPONSE as any);
render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => {
expect(screen.queryAllByTestId("usage-skeleton-row").length).toBe(0);
expect(screen.getByTestId("usage-input-tokens")).toBeTruthy();
expect(screen.getByTestId("usage-output-tokens")).toBeTruthy();
expect(screen.getByTestId("usage-estimated-cost")).toBeTruthy();
});
});
it("shows error message when fetch fails", async () => {
mockGet.mockRejectedValue(new Error("API GET /workspaces/ws-1/metrics: 403 Forbidden"));
render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => {
const err = screen.getByTestId("usage-error");
expect(err).toBeTruthy();
expect(err.textContent).toContain("403");
});
});
it("does not show stat rows on error", async () => {
mockGet.mockRejectedValue(new Error("network error"));
render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => {
expect(screen.queryByTestId("usage-input-tokens")).toBeNull();
expect(screen.queryByTestId("usage-output-tokens")).toBeNull();
expect(screen.queryByTestId("usage-estimated-cost")).toBeNull();
});
});
it("re-fetches when workspaceId prop changes", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValue(METRICS_RESPONSE as any);
const { rerender } = render(<WorkspaceUsage workspaceId="ws-1" />);
await waitFor(() => expect(mockGet).toHaveBeenCalledTimes(1));
rerender(<WorkspaceUsage workspaceId="ws-2" />);
await waitFor(() => {
expect(mockGet).toHaveBeenCalledTimes(2);
expect(mockGet).toHaveBeenLastCalledWith("/workspaces/ws-2/metrics");
});
});
it("renders the usage-stats container in all states", () => {
mockGet.mockReturnValue(new Promise(() => {}));
render(<WorkspaceUsage workspaceId="ws-1" />);
expect(screen.getByTestId("usage-stats")).toBeTruthy();
});
});

View File

@ -6,6 +6,24 @@ import React from "react";
import { render, screen, fireEvent, cleanup } from "@testing-library/react";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
// vi.mock is hoisted to module top level by Vitest regardless of where it appears
// in the source. Placing it here explicitly matches that runtime behaviour and
// silences the "not at top level" warning (closes #632).
vi.mock("../../../store/canvas", () => ({
useCanvasStore: Object.assign(
vi.fn(() => null),
{
getState: () => ({
selectedNodeId: null,
nodes: [],
contextMenu: null,
closeContextMenu: vi.fn(),
selectNode: vi.fn(),
}),
}
),
}));
afterEach(() => cleanup());
// ─── Z key handler unit tests (no React needed) ─────────────────────────────
@ -25,22 +43,6 @@ describe("Z key → molecule:zoom-to-team", () => {
});
it("does NOT fire when no node is selected", () => {
// Simulate store: no selection
vi.mock("../../../store/canvas", () => ({
useCanvasStore: Object.assign(
vi.fn(() => null),
{
getState: () => ({
selectedNodeId: null,
nodes: [],
contextMenu: null,
closeContextMenu: vi.fn(),
selectNode: vi.fn(),
}),
}
),
}));
fireEvent.keyDown(window, { key: "Z" });
expect(dispatchedEvents).toHaveLength(0);
});

View File

@ -0,0 +1,253 @@
'use client';
import { useState, useEffect, useCallback } from "react";
import { api } from "@/lib/api";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface BudgetData {
budget_limit: number | null;
budget_used: number;
budget_remaining: number | null;
}
interface Props {
workspaceId: string;
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** True when an API error carries a 402 status code. */
function isApiError402(e: unknown): boolean {
return e instanceof Error && /: 402( |$)/.test(e.message);
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
/**
* BudgetSection dedicated "Budget" section in the workspace details panel.
*
* - Fetches GET /workspaces/:id/budget on mount for live usage stats
* - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
* - Allows updating budget_limit via PATCH /workspaces/:id/budget
* - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
*/
export function BudgetSection({ workspaceId }: Props) {
const [budget, setBudget] = useState<BudgetData | null>(null);
const [loading, setLoading] = useState(true);
const [fetchError, setFetchError] = useState<string | null>(null);
const [limitInput, setLimitInput] = useState("");
const [saving, setSaving] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
/** True when a 402 has been seen from any API call in this section. */
const [budgetExceeded, setBudgetExceeded] = useState(false);
// ── Fetch current budget data ─────────────────────────────────────────────
const loadBudget = useCallback(async () => {
setLoading(true);
setFetchError(null);
try {
const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
setBudget(data);
setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
} catch (e) {
if (isApiError402(e)) {
setBudgetExceeded(true);
} else {
setFetchError(e instanceof Error ? e.message : "Failed to load budget");
}
} finally {
setLoading(false);
}
}, [workspaceId]);
useEffect(() => {
loadBudget();
}, [loadBudget]);
// ── Save handler ──────────────────────────────────────────────────────────
const handleSave = async () => {
setSaving(true);
setSaveError(null);
const raw = limitInput.trim();
// Use explicit empty-string check (not falsy check) so that a
// user-entered "0" is sent as budget_limit: 0, not null (unlimited).
const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
try {
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
budget_limit: parsedLimit,
});
setBudget(updated);
setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
// Clear exceeded state if the save succeeded (limit was raised or removed)
setBudgetExceeded(false);
} catch (e) {
if (isApiError402(e)) {
setBudgetExceeded(true);
} else {
setSaveError(e instanceof Error ? e.message : "Failed to save budget");
}
} finally {
setSaving(false);
}
};
// ── Progress calculation ──────────────────────────────────────────────────
const progressPct =
budget && budget.budget_limit != null && budget.budget_limit > 0
? Math.min(100, Math.round((budget.budget_used / budget.budget_limit) * 100))
: 0;
// ── Render ────────────────────────────────────────────────────────────────
return (
<div className="space-y-3" data-testid="budget-section">
{/* Section header */}
<div>
<h3 className="text-xs font-semibold text-zinc-400 uppercase tracking-wider">
Budget
</h3>
<p className="text-[11px] text-zinc-400 mt-0.5">
Limit total message credits for this workspace
</p>
</div>
{/* 402 exceeded banner */}
{budgetExceeded && (
<div
role="alert"
data-testid="budget-exceeded-banner"
className="flex items-center gap-2 px-3 py-2 rounded-lg bg-zinc-950 border border-amber-700/50 text-amber-400 text-xs font-medium"
>
<svg
width="13"
height="13"
viewBox="0 0 13 13"
fill="none"
aria-hidden="true"
className="shrink-0"
>
<path
d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
stroke="currentColor"
strokeWidth="1.4"
strokeLinejoin="round"
/>
<path
d="M6.5 5.5V7.5M6.5 9.5h.01"
stroke="currentColor"
strokeWidth="1.4"
strokeLinecap="round"
/>
</svg>
Budget exceeded messages blocked
</div>
)}
{/* Usage stats */}
{loading ? (
<p className="text-xs text-zinc-500" data-testid="budget-loading">
Loading
</p>
) : fetchError ? (
<p className="text-xs text-red-400" data-testid="budget-fetch-error">
{fetchError}
</p>
) : budget ? (
<div className="space-y-2">
{/* Stats row */}
<div className="flex items-baseline justify-between" data-testid="budget-stats-row">
<span className="text-xs text-zinc-400">Credits used</span>
<span className="text-xs font-mono text-zinc-300">
<span data-testid="budget-used-value">{budget.budget_used.toLocaleString()}</span>
<span className="text-zinc-500 mx-1">/</span>
<span data-testid="budget-limit-value">
{budget.budget_limit != null
? budget.budget_limit.toLocaleString()
: "Unlimited"}
</span>
</span>
</div>
{/* Progress bar (only when limit is set) */}
{budget.budget_limit != null && (
<div
role="progressbar"
aria-label="Budget usage"
aria-valuenow={progressPct}
aria-valuemin={0}
aria-valuemax={100}
className="h-1.5 w-full rounded-full bg-zinc-800 overflow-hidden"
>
<div
data-testid="budget-progress-fill"
className="h-full rounded-full bg-blue-500 transition-all duration-300"
style={{ width: `${progressPct}%` }}
/>
</div>
)}
{/* Remaining credits */}
{budget.budget_remaining != null && (
<p className="text-[11px] text-zinc-500" data-testid="budget-remaining">
{budget.budget_remaining.toLocaleString()} credits remaining
</p>
)}
</div>
) : null}
{/* Input + Save */}
<div className="space-y-1.5 pt-1">
<label
htmlFor={`budget-limit-input-${workspaceId}`}
className="text-[11px] text-zinc-400 block"
>
Budget limit (credits)
</label>
<input
id={`budget-limit-input-${workspaceId}`}
type="number"
min="0"
step="1"
value={limitInput}
onChange={(e) => setLimitInput(e.target.value)}
placeholder="e.g. 1000 — blank for unlimited"
data-testid="budget-limit-input"
className="w-full bg-zinc-800 border border-zinc-700 rounded-lg px-3 py-2 text-sm text-zinc-300 placeholder-zinc-500 focus:outline-none focus:border-blue-500 focus:ring-1 focus:ring-blue-500/30 transition-colors"
/>
<p className="text-xs text-zinc-500">Leave blank for unlimited</p>
{saveError && (
<div
role="alert"
data-testid="budget-save-error"
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-red-400"
>
{saveError}
</div>
)}
<button
onClick={handleSave}
disabled={saving}
data-testid="budget-save-btn"
className="px-4 py-1.5 bg-blue-600 hover:bg-blue-500 active:bg-blue-700 rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors"
>
{saving ? "Saving…" : "Save"}
</button>
</div>
</div>
);
}

View File

@ -141,19 +141,29 @@ export function ChannelsTab({ workspaceId }: Props) {
}
};
const [error, setError] = useState("");
const handleToggle = async (ch: Channel) => {
await api.patch(`/workspaces/${workspaceId}/channels/${ch.id}`, {
enabled: !ch.enabled,
});
load();
try {
await api.patch(`/workspaces/${workspaceId}/channels/${ch.id}`, {
enabled: !ch.enabled,
});
load();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to toggle channel");
}
};
const confirmDelete = async () => {
if (!pendingDelete) return;
const ch = pendingDelete;
setPendingDelete(null);
await api.del(`/workspaces/${workspaceId}/channels/${ch.id}`);
load();
try {
await api.del(`/workspaces/${workspaceId}/channels/${ch.id}`);
load();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to delete channel");
}
};
const handleTest = async (ch: Channel) => {
@ -188,6 +198,12 @@ export function ChannelsTab({ workspaceId }: Props) {
</button>
</div>
{error && (
<div className="px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-red-400">
{error}
</div>
)}
{/* Create form */}
{showForm && (
<div className="space-y-2 p-3 bg-zinc-800/40 rounded border border-zinc-700/50">

View File

@ -267,6 +267,48 @@ export function ConfigTab({ workspaceId }: Props) {
<TagList label="Required Env Vars" values={config.runtime_config?.required_env || []} onChange={(v) => updateNested("runtime_config" as keyof ConfigData, "required_env", v)} placeholder="e.g. CLAUDE_CODE_OAUTH_TOKEN" />
</Section>
{/* Claude Settings — shown for claude-code runtime or claude/anthropic model names */}
{(config.runtime === "claude-code" ||
(config.runtime_config?.model || config.model || "").toLowerCase().includes("claude") ||
(config.runtime_config?.model || config.model || "").toLowerCase().includes("anthropic")) && (
<Section title="Claude Settings" defaultOpen={false}>
<div>
<label className="text-[10px] text-zinc-500 block mb-1">
Effort
<span className="ml-1 text-zinc-600">(output_config.effort Opus 4.7+)</span>
</label>
<select
value={config.effort || ""}
onChange={(e) => update("effort", e.target.value)}
className="w-full bg-zinc-800 border border-zinc-700 rounded px-2 py-1 text-xs text-zinc-200 focus:outline-none focus:border-blue-500"
data-testid="effort-select"
>
<option value=""> unset (model default) </option>
<option value="low">low</option>
<option value="medium">medium</option>
<option value="high">high</option>
<option value="xhigh">xhigh (extended thinking)</option>
</select>
</div>
<div>
<label className="text-[10px] text-zinc-500 block mb-1">
Task Budget (tokens)
<span className="ml-1 text-zinc-600">(output_config.task_budget.total 0 = unset)</span>
</label>
<input
type="number"
min={0}
step={1000}
value={config.task_budget ?? 0}
onChange={(e) => update("task_budget", parseInt(e.target.value, 10) || 0)}
placeholder="0"
className="w-full bg-zinc-800 border border-zinc-700 rounded px-2 py-1 text-xs text-zinc-200 focus:outline-none focus:border-blue-500 font-mono"
data-testid="task-budget-input"
/>
</div>
</Section>
)}
<Section title="Skills & Tools" defaultOpen={false}>
<TagList label="Skills" values={config.skills || []} onChange={(v) => update("skills", v)} placeholder="e.g. code-review" />
<TagList label="Tools" values={config.tools || []} onChange={(v) => update("tools", v)} placeholder="e.g. web_search, filesystem" />

View File

@ -4,6 +4,8 @@ import { useState, useEffect, useCallback } from "react";
import { api } from "@/lib/api";
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
import { StatusDot } from "../StatusDot";
import { BudgetSection } from "./BudgetSection";
import { WorkspaceUsage } from "../WorkspaceUsage";
interface Props {
workspaceId: string;
@ -59,7 +61,11 @@ export function DetailsTab({ workspaceId, data }: Props) {
setSaving(true);
setSaveError(null);
try {
await api.patch(`/workspaces/${workspaceId}`, { name, role: role || null, tier });
await api.patch(`/workspaces/${workspaceId}`, {
name,
role: role || null,
tier,
});
updateNodeData(workspaceId, { name, role: role || "", tier });
setEditing(false);
} catch (e) {
@ -145,7 +151,13 @@ export function DetailsTab({ workspaceId, data }: Props) {
{saving ? "Saving..." : "Save"}
</button>
<button
onClick={() => { setEditing(false); setSaveError(null); setName(data.name); setRole(data.role || ""); setTier(data.tier); }}
onClick={() => {
setEditing(false);
setSaveError(null);
setName(data.name);
setRole(data.role || "");
setTier(data.tier);
}}
className="px-3 py-1 bg-zinc-700 hover:bg-zinc-600 text-xs rounded text-zinc-300"
>
Cancel
@ -190,6 +202,12 @@ export function DetailsTab({ workspaceId, data }: Props) {
)}
</Section>
{/* Budget — dedicated section with live usage stats (#541) */}
<BudgetSection workspaceId={workspaceId} />
{/* Token usage + spend — wired to GET /workspaces/:id/metrics (#592) */}
<WorkspaceUsage workspaceId={workspaceId} />
{/* Agent Card / Skills */}
{skills.length > 0 && (
<Section title="Skills">

View File

@ -219,7 +219,7 @@ export function MemoryTab({ workspaceId }: Props) {
Refresh
</button>
<button
onClick={() => setShowAdd(!showAdd)}
onClick={() => { setShowAdd(!showAdd); if (!showAdd) setShowAdvanced(true); }}
className="px-2 py-1 bg-blue-600 hover:bg-blue-500 text-[10px] rounded text-white"
>
+ Add

View File

@ -126,15 +126,23 @@ export function ScheduleTab({ workspaceId }: Props) {
if (!pendingDelete) return;
const { id } = pendingDelete;
setPendingDelete(null);
await api.del(`/workspaces/${workspaceId}/schedules/${id}`);
fetchSchedules();
try {
await api.del(`/workspaces/${workspaceId}/schedules/${id}`);
fetchSchedules();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to delete schedule");
}
};
const handleToggle = async (sched: Schedule) => {
await api.patch(`/workspaces/${workspaceId}/schedules/${sched.id}`, {
enabled: !sched.enabled,
});
fetchSchedules();
try {
await api.patch(`/workspaces/${workspaceId}/schedules/${sched.id}`, {
enabled: !sched.enabled,
});
fetchSchedules();
} catch (e: unknown) {
setError(e instanceof Error ? e.message : "Failed to toggle schedule");
}
};
const handleEdit = (sched: Schedule) => {

View File

@ -68,11 +68,14 @@ export function TracesTab({ workspaceId }: Props) {
{traces.length === 0 && !error ? (
<div className="text-center py-8">
<div className="text-2xl opacity-20 mb-2">📊</div>
<div className="text-2xl opacity-20 mb-2" aria-hidden="true">--</div>
<p className="text-xs text-zinc-600">No traces yet</p>
<p className="text-[10px] text-zinc-700 mt-1">
Set LANGFUSE_HOST, LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY to enable tracing
</p>
<details className="mt-2 text-[10px] text-zinc-700">
<summary className="cursor-pointer text-zinc-500 hover:text-zinc-400">How to enable tracing</summary>
<p className="mt-1">
Set <code className="font-mono text-zinc-400">LANGFUSE_HOST</code>, <code className="font-mono text-zinc-400">LANGFUSE_PUBLIC_KEY</code>, <code className="font-mono text-zinc-400">LANGFUSE_SECRET_KEY</code> as workspace secrets to enable tracing.
</p>
</details>
</div>
) : (
<div className="space-y-1">

View File

@ -16,6 +16,11 @@ export interface ConfigData {
// Deprecated
auth_token_file?: string;
};
// Claude API primitives (Opus 4.7+) — issue #608
// effort maps to output_config.effort in Messages API: 'low' | 'medium' | 'high' | 'xhigh'
effort?: string;
// task_budget maps to output_config.task_budget.total (requires beta header task-budgets-2026-03-13)
task_budget?: number;
prompt_files: string[];
shared_context: string[];
skills: string[];
@ -32,6 +37,8 @@ export const DEFAULT_CONFIG: ConfigData = {
tier: 1,
model: "",
runtime: "",
effort: "",
task_budget: 0,
prompt_files: [],
shared_context: [],
skills: [],

View File

@ -116,6 +116,9 @@ export function toYaml(config: ConfigData): string {
}
}
if (config.model) { lines.push(""); simple("model", config.model); }
// Claude API primitives (issue #608)
if (config.effort) { lines.push(""); simple("effort", config.effort); }
if (config.task_budget && config.task_budget > 0) { simple("task_budget", config.task_budget); }
if (config.prompt_files?.length) { lines.push(""); list("prompt_files", config.prompt_files); }
if (config.shared_context?.length) { lines.push(""); list("shared_context", config.shared_context); }
lines.push(""); list("skills", config.skills);

View File

@ -1,3 +1,5 @@
"use client";
interface RevealToggleProps {
revealed: boolean;
onToggle: () => void;

View File

@ -17,6 +17,8 @@ export const RUNTIME_REQUIRED_KEYS: Record<string, string[]> = {
deepagents: ["OPENAI_API_KEY"],
crewai: ["OPENAI_API_KEY"],
autogen: ["OPENAI_API_KEY"],
hermes: ["OPENROUTER_API_KEY"],
"gemini-cli": ["GOOGLE_API_KEY"],
};
/** Human-readable labels for common secret keys */
@ -26,6 +28,8 @@ export const KEY_LABELS: Record<string, string> = {
GOOGLE_API_KEY: "Google AI API Key",
SERP_API_KEY: "SERP API Key",
OPENROUTER_API_KEY: "OpenRouter API Key",
HERMES_API_KEY: "Nous Research API Key",
DEEPSEEK_API_KEY: "DeepSeek API Key",
};
/* ---------- Types ---------- */

View File

@ -719,6 +719,33 @@ describe("misc state setters", () => {
});
});
// ---------- hydrationError (#554) ----------
describe("hydrationError", () => {
it("initial value is null", () => {
expect(useCanvasStore.getState().hydrationError).toBeNull();
});
it("setHydrationError stores an error message", () => {
useCanvasStore.getState().setHydrationError("Network timeout");
expect(useCanvasStore.getState().hydrationError).toBe("Network timeout");
});
it("setHydrationError(null) clears the error", () => {
useCanvasStore.getState().setHydrationError("Some error");
useCanvasStore.getState().setHydrationError(null);
expect(useCanvasStore.getState().hydrationError).toBeNull();
});
it("setHydrationError does not affect other state", () => {
useCanvasStore.getState().hydrate([makeWS({ id: "ws-x", name: "X" })]);
useCanvasStore.getState().setHydrationError("oops");
// Nodes should still be intact
expect(useCanvasStore.getState().nodes).toHaveLength(1);
expect(useCanvasStore.getState().nodes[0].id).toBe("ws-x");
});
});
// ---------- ACTIVITY_LOGGED event ----------
describe("ACTIVITY_LOGGED event", () => {

View File

@ -142,6 +142,8 @@ export function buildNodesAndEdges(
currentTask: ws.current_task || "",
runtime: ws.runtime || "",
needsRestart: false,
budgetLimit: ws.budget_limit ?? null,
budgetUsed: ws.budget_used ?? null,
},
// Hide child nodes from canvas — they render inside the parent WorkspaceNode
hidden: !!ws.parent_id,

View File

@ -29,6 +29,10 @@ export interface WorkspaceNodeData extends Record<string, unknown> {
currentTask: string;
runtime: string;
needsRestart: boolean;
/** USD spend ceiling set by the user; null = unlimited. Added by issue #541. */
budgetLimit: number | null;
/** Cumulative USD spend. Present when the platform tracks spend (issue #541). */
budgetUsed?: number | null;
}
export type PanelTab = "details" | "skills" | "chat" | "terminal" | "config" | "schedule" | "channels" | "files" | "memory" | "traces" | "events" | "activity";
@ -73,6 +77,9 @@ interface CanvasState {
/** WebSocket connection status — drives the live indicator in the Toolbar. */
wsStatus: "connected" | "connecting" | "disconnected";
setWsStatus: (status: "connected" | "connecting" | "disconnected") => void;
/** Hydration error message — set when initial canvas load fails. Null when no error. */
hydrationError: string | null;
setHydrationError: (error: string | null) => void;
}
export const useCanvasStore = create<CanvasState>((set, get) => ({
@ -84,6 +91,8 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
contextMenu: null,
wsStatus: "connecting",
setWsStatus: (status) => set({ wsStatus: status }),
hydrationError: null,
setHydrationError: (error) => set({ hydrationError: error }),
viewport: { x: 0, y: 0, zoom: 1 },

View File

@ -118,6 +118,10 @@ export interface WorkspaceData {
x: number;
y: number;
collapsed: boolean;
/** USD spend ceiling set by the user; null = unlimited. Added by issue #541. */
budget_limit: number | null;
/** Cumulative USD spend for this workspace. Present when the platform tracks spend. */
budget_used?: number | null;
}
let socket: ReconnectingSocket | null = null;

View File

@ -0,0 +1,108 @@
---
title: "Deploy AI Agents on Fly.io — or Any Cloud — with One Config Change"
date: 2026-04-17
slug: deploy-anywhere
description: "Molecule AI supports fly.io agent deployment and control-plane provisioning. Switch backends with one env var — no agent code changes required."
tags: [platform, fly.io, deployment, infrastructure]
---
# Deploy AI Agents on Fly.io — or Any Cloud — with One Config Change
Your infrastructure choice just got decoupled from your agent platform choice. Molecule AI now ships three production-ready workspace backends — `docker`, `flyio`, and `controlplane` — and switching between them takes a single environment variable. Your agent code, model choices, and workspace topology stay exactly the same.
This post covers what shipped in [PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner), and which backend fits your situation.
## Before: One Deployment Model for Every Use Case
Until this week, Molecule AI workspaces ran on one backend: Docker. That was the right default for self-hosters — no external dependencies, full control, works anywhere a Docker daemon runs. But it left two groups making a compromise they shouldn't have to:
- **Indie developers and small teams** wanted Fly.io's economics: pay-per-use compute, fast cold starts, scale to zero when nobody's working.
- **SaaS builders** needed structural credential isolation. A Fly API token sitting in the tenant layer is one misconfiguration away from a security incident — not a policy problem, an architecture problem.
Both groups were choosing between "use the platform" and "get the deployment model I need." That trade-off is gone.
## Run AI Agents on Fly: The Indie Dev Path
You're already on Fly. You have an account, a Fly app, and you're comfortable with Machines. You want Molecule AI workspaces to provision as Fly Machines — no separate Docker host, no idle infrastructure, just workspaces that appear when needed and disappear when they don't.
Set three environment variables on your tenant platform instance:
```bash
CONTAINER_BACKEND=flyio
FLY_API_TOKEN=<your-fly-deploy-token>
FLY_WORKSPACE_APP=<your-fly-app-name>
# Optional — defaults to ord
FLY_REGION=ord
```
When a workspace is created, the Fly provisioner:
1. Spins up a Fly Machine inside your `FLY_WORKSPACE_APP`
2. Injects workspace secrets and the platform registration URL as machine env vars
3. Selects the right GHCR image for the runtime (`hermes` → `ghcr.io/molecule-ai/workspace-hermes:latest`, and so on)
4. Applies tier-based resource limits — T2 at 512 MB / 1 vCPU, T3 at 2 GB / 2 vCPU, T4 at 4 GB / 4 vCPU
5. Issues a boot-time auth token so the workspace agent can register with the platform immediately
Your workspaces run as first-class Fly Machines. When they're idle, Fly handles the scale-down. Your bill reflects actual usage, not reserved capacity.
## Multi-Tenant Agent Provisioning Without Credential Sprawl
You're building a SaaS product on top of Molecule AI. Each customer gets a Molecule workspace. The problem: if every tenant platform instance carries a `FLY_API_TOKEN`, you've distributed cloud credentials across your tenants — structurally. Policy controls help, but they don't remove the credential from the attack surface.
`CONTAINER_BACKEND=controlplane` removes it entirely.
```
Canvas → Tenant Platform → Control Plane API → Fly Machines API
```
The tenant platform never holds a Fly token. It calls the Molecule control plane at `https://api.moleculesai.app` (overridable via `CP_PROVISION_URL` for staging environments), which holds Fly credentials and orchestrates workspace provisioning centrally.
For standard SaaS deployments, you don't configure this manually — the platform auto-detects the right backend:
- `MOLECULE_ORG_ID` set → SaaS tenant → **control plane provisioner activates automatically**
- `MOLECULE_ORG_ID` empty → self-hosted → **Docker provisioner, no change needed**
The right backend is the default for your context. For most SaaS builders: set `MOLECULE_ORG_ID` at tenant launch, and credential isolation is structural from day one.
## Self-Hosted vs Cloud AI Agents: Backend Comparison
| Backend | `CONTAINER_BACKEND` | Best for | Who holds cloud credentials |
|---|---|---|---|
| **Docker** | *(empty / default)* | Self-hosted, local dev | No external credentials needed |
| **Fly Machines** | `flyio` | Indie devs / small teams on Fly | `FLY_API_TOKEN` lives on the tenant |
| **Control Plane** | `controlplane` | SaaS builders, multi-tenant products | Fly token held by control plane only — never on tenant |
**Fly backend env vars** (for `CONTAINER_BACKEND=flyio`):
| Variable | Required | Default | What it does |
|---|---|---|---|
| `CONTAINER_BACKEND` | Yes | — | Activates the Fly provisioner |
| `FLY_API_TOKEN` | Yes | — | Fly deploy token |
| `FLY_WORKSPACE_APP` | Yes | — | Fly app that hosts workspace machines |
| `FLY_REGION` | No | `ord` | Region for new machines |
## Agent Orchestration in the Cloud: What Doesn't Change
Switching backends changes where workspaces run, not how they work. From any agent runtime's perspective — Hermes, Letta, or whatever you're orchestrating — the workspace is the workspace. Unchanged across all three backends:
- Agent registration and boot sequence
- Model routing and provider dispatch
- Workspace secrets injection
- The full platform API surface
No changes to agent code, tool definitions, or orchestration logic. Swap `CONTAINER_BACKEND`, redeploy, done.
## Multi-Agent Cloud Deployment: Choose Your Path
- **Self-hosting?** Leave `CONTAINER_BACKEND` unset. Docker is the default — nothing to configure.
- **On Fly, small team?** Set `CONTAINER_BACKEND=flyio` with `FLY_API_TOKEN` and `FLY_WORKSPACE_APP`. Workspaces become Fly Machines in your own Fly account.
- **Building a SaaS product on Molecule AI?** Set `MOLECULE_ORG_ID` at tenant launch. The control plane provisioner activates automatically. No Fly credentials on the tenant, ever.
**Pick your backend. Deploy your agents.**
→ [Quickstart: choose your deployment backend](/docs/quickstart)
---
*[PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner) are both merged to `main`. Molecule AI is open source — contributions welcome.*

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,112 @@
# Competitor Tracker
> **Auto-maintained by PMM cron** — diffs `docs/ecosystem-watch.md` on schedule
> to detect version bumps, threat escalations, and notable changes.
>
> Source of truth for competitor state: `docs/ecosystem-watch.md#competitor-snapshot`
> Full narrative analysis: `docs/ecosystem-watch.md#entries`
>
> **Last updated:** 2026-04-17 (bootstrap — subsequent updates by PMM cron)
---
## High-Threat Competitors
Platforms that directly substitute for or significantly erode Molecule AI's market position.
| Competitor | Version | Stars | Threat Signal | Updated |
|---|---|---|---|---|
| [OpenAI Agents SDK](https://github.com/openai/openai-agents-python) | v0.14.1 | 14k | v0.14.1 SandboxAgent beta — persistent isolated workspaces, snapshot/resume, sandbox memory; directly competes with our workspace lifecycle | 2026-04-17 |
| [CrewAI](https://github.com/crewAIInc/crewAI) | v1.14.1 | 48k | 1.4B agentic automations, 60% Fortune 500 adoption, $18M Insight-led round; CrewAI Enterprise SaaS targeting our enterprise segment | 2026-04-17 |
| [Google ADK](https://github.com/google/adk-python) | v1.30.0 | 19k | v1.30.0 adds Auth Provider registry; full Google agent stack (ADK + Gemini CLI + adk-web DevUI + Scion harness) = largest platform risk | 2026-04-17 |
| [Microsoft Agent Framework](https://github.com/microsoft/agent-framework) | python-1.0.1 | 9.5k | v1.0 GA (official AutoGen successor); SOC 2/HIPAA compliance; .NET + Python; Process Framework GA in Q2 2026 | 2026-04-17 |
---
## Medium-Threat Competitors
Significant overlap in adjacent space; active watch required.
| Competitor | Version | Stars | Notes | Updated |
|---|---|---|---|---|
| [Paperclip](https://github.com/paperclipai/paperclip) | v2026.416.0 | 54.8k | Downgraded HIGH→MEDIUM (deep-dive #571): no A2A, no visual canvas on roadmap; single-process task DAG only; brand/framing threat ("zero-human companies"), not a technical substitute. Only gap vs Molecule AI: per-workspace budget limits (#541). | 2026-04-17 |
| [Dify](https://github.com/langgenius/dify) | v1.13.3 | 60k | v1.14.0 RC adds Human Input node; $30M Pre-A ($180M val); no-code positioning targets business users, not our developer audience | 2026-04-17 |
| [LangGraph](https://github.com/langchain-ai/langgraph) | v1.1.6 | 29k | CLI v0.4.22 Apr 16; LangGraph Cloud hosted execution competes with our scheduler | 2026-04-17 |
| [VoltAgent](https://github.com/VoltAgent/voltagent) | server-elysia@2.0.7 | 8.2k | VoltOps Console = closest Canvas analogue in TypeScript ecosystem | 2026-04-17 |
| [n8n](https://github.com/n8n-io/n8n) | v2.17.2 | 50k | n8n 2.0 enterprise AI Agent nodes + RBAC + 400+ channel integrations | 2026-04-17 |
| [Claude Code Routines](https://code.claude.com/docs/en/routines) | cloud-feature | — | Apr 14 2026 launch: Anthropic-hosted cron + GitHub-event-triggered Claude Code sessions | 2026-04-17 |
| [Scion](https://github.com/GoogleCloudPlatform/scion) | active | early | GCP experimental container-per-agent harness (Apr 8 2026); escalation risk to HIGH if productized | 2026-04-17 |
| [Multica](https://github.com/multica-ai/multica) | active | 12.8k | Positioned as Claude Managed Agents alternative; local daemon + central backend with skill compounding | 2026-04-17 |
| [Cline](https://github.com/cline/cline) | active | 44k | Primary user-overlap with our Claude Code workspace; developers who outgrow Cline convert to Molecule AI | 2026-04-17 |
| [ClawRun](https://github.com/clawrun-sh/clawrun) | active | 84 | Closest architectural match tracked (sandbox/heartbeat/snapshot-resume/channels/cost-tracking); early stage but actively shipped | 2026-04-17 |
| [Gemini CLI](https://github.com/google-gemini/gemini-cli) | v0.38.1 | 101k | Runtime candidate for our workspace adapter; elevated to MEDIUM as part of Google's full agent stack | 2026-04-17 |
---
## Low-Threat Competitors
Tools, infra layers, single-agent products, or projects we use — not direct substitutes.
| Competitor | Version | Stars | Role | Updated |
|---|---|---|---|---|
| [Hermes Agent](https://github.com/NousResearch/hermes-agent) | v0.10.0 | 61k | v0.10.0 (Apr 16) Tool Gateway launch; personal AI single-user shape | 2026-04-17 |
| [gstack](https://github.com/garrytan/gstack) | active | 70k | Sequential single-session Claude Code persona-switching; no multi-agent infra | 2026-04-17 |
| [claude-mem](https://github.com/thedotmack/claude-mem) | active | 56k | Memory addon; 56k ⭐ signals demand gap we need to close in agent_memories | 2026-04-17 |
| [Flowise](https://github.com/FlowiseAI/Flowise) | flowise@3.1.2 | 30k | Acquired by Workday (Aug 2025); v3.1.2 security hardening; narrowed to HR/finance enterprise | 2026-04-17 |
| [OpenHands](https://github.com/All-Hands-AI/OpenHands) | v1.6.0 | 47k | SWE-Bench top; v1.6.0 (Mar 30); single-agent software engineer only | 2026-04-17 |
| [Temporal](https://github.com/temporalio/temporal) | v1.30.4 | 13k | Durable execution infra we integrate; $5B valuation, not a competitor | 2026-04-17 |
| [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) | active | 35.5k | Browser MCP we adopt (issue #540); 23-tool surface | 2026-04-17 |
| [AgentScope](https://github.com/modelscope/agentscope) | v1.0.18 | 23.8k | Alibaba/ModelScope framework; MCP integration; no deployment layer | 2026-04-17 |
| [Composio](https://github.com/composio-dev/composio) | active | 18k | Tool integration library; potential skill-pack dependency | 2026-04-17 |
| [Archon](https://github.com/coleam00/Archon) | v0.3.6 | 18.1k | YAML-DAG coding workflow; reference design for workspace delivery pipelines | 2026-04-17 |
| [Skills CLI](https://github.com/vercel-labs/skills) | active | 14.2k | Vercel agentskills.io CLI; aligning plugins/ = free distribution channel | 2026-04-17 |
| [Holaboss](https://github.com/holaboss-ai/holaboss-ai) | active | 1.7k | Desktop AI employee; terminology collisions (workspace/SKILL.md) | 2026-04-17 |
| [Tencent AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard) | v4.1.3 | 3.5k | Security scanner; use as MCP + plugin registry compliance checklist | 2026-04-17 |
| [Plannotator](https://github.com/backnotprop/plannotator) | v0.17.10 | 4.3k | HITL plan annotation UX; reference for improving approvals API schema | 2026-04-17 |
| [open-multi-agent](https://github.com/JackChen-me/open-multi-agent) | v1.1.0 | 5.7k | TypeScript goal-to-DAG library; ephemeral, no identity | 2026-04-17 |
| [Open Agents (Vercel)](https://github.com/vercel-labs/open-agents) | active | 2.2k | Reference app; snapshot-based VM resumption pattern worth borrowing | 2026-04-17 |
| [GenericAgent](https://github.com/lsdefine/GenericAgent) | v1.0 | 2.1k | Self-evolving skill tree; four-tier memory taxonomy worth borrowing | 2026-04-17 |
| [OpenSRE](https://github.com/Tracer-Cloud/opensre) | active | 900 | AI SRE toolkit; potential DevOps workspace skill-pack source | 2026-04-17 |
| [AMD GAIA](https://github.com/amd/gaia) | v0.17.2 | 1.2k | Hardware-locked (AMD Ryzen AI 300+); not general-purpose | 2026-04-17 |
---
## Watchlist — Escalation Signals
The following events would require immediate threat-level re-assessment:
| Competitor | Watch Signal | Current Level | Escalates To |
|---|---|---|---|
| Paperclip | Ships persistent agent memory | MEDIUM | HIGH — 54.8k ⭐ head-start |
| Paperclip | Ships visual org-chart canvas | MEDIUM | HIGH — direct Canvas competitor |
| Scion | Google productizes as managed GCP service | MEDIUM | HIGH |
| VoltAgent | VoltOps Console adds visual org-chart topology | MEDIUM | HIGH |
| Google ADK | ADK + Vertex AI becomes hosted managed platform | HIGH | CRITICAL |
| OpenAI Agents SDK | Inter-sandbox A2A across process boundaries | HIGH | CRITICAL |
| ClawRun | Adds A2A or multi-agent coordination | MEDIUM | HIGH |
| gstack | Adds multi-session/parallel execution | LOW | HIGH — 70k ⭐ head-start |
| Claude Code Routines | Adds A2A between routine sessions | MEDIUM | HIGH — Anthropic distribution |
---
## Recently Changed (last 30 days)
> PMM cron updates this section automatically when `notable_changes` or `version` fields change.
| Date | Competitor | Change |
|---|---|---|
| 2026-04-17 | **Paperclip** | Threat downgraded HIGH→MEDIUM (deep-dive #571): no A2A, no canvas, brand threat only |
| 2026-04-17 | **Paperclip** | v2026.416.0 — execution policies + chat threads for agent transcripts |
| 2026-04-17 | **Hermes Agent** | v0.10.0 — Tool Gateway (web search, image gen, TTS, browser automation) |
| 2026-04-16 | **LangGraph CLI** | v0.4.22 — deploy source tracking |
| 2026-04-15 | **OpenAI Agents SDK** | v0.14.1 — tracing patch on top of Sandbox Agents beta |
| 2026-04-15 | **Gemini CLI** | v0.38.1 — stability patch |
| 2026-04-14 | **Flowise** | v3.1.2 — security hardening (CORS, credential leaks) |
| 2026-04-14 | **Claude Code Routines** | Launched — Anthropic-hosted cron-triggered Claude Code sessions |
| 2026-04-13 | **Google ADK** | v1.30.0 — Auth Provider + Parameter Manager + Gemma 4 support |
| 2026-04-11 | **VoltAgent** | server-elysia@2.0.7 — A2A agent card URL fix |
| 2026-04-10 | **LangGraph** | v1.1.6 — declarative guardrail nodes (LangGraph 2.0 GA) |
| 2026-04-10 | **Temporal** | v1.30.4 — CVE-2026-5724 security patch |
| 2026-04-10 | **Microsoft Agent Framework** | python-1.0.1 — FileCheckpointStorage security hardening |
| 2026-04-08 | **Scion** | Launched — GCP container-per-agent experimental harness |
| 2026-04-08 | **CrewAI** | v1.14.1 — async checkpoint TUI browser |

View File

@ -0,0 +1,92 @@
# Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)
Molecule AI can provision agent workspaces as [Fly Machines](https://fly.io/docs/machines/) instead of local Docker containers. Set `CONTAINER_BACKEND=flyio` on your platform and every `POST /workspaces` call creates a Fly Machine in your app — with tier-based resource limits, env-var injection, and A2A registration handled automatically.
> **Scope note (PR #501):** Workspace images must already be published to GHCR before provisioning. The `delete` and `restart` platform endpoints are not yet fully wired to the Fly provisioner — use `flyctl machine stop/destroy` for teardown until a follow-up PR lands.
## What you'll need
- A Molecule AI platform instance
- A [Fly.io](https://fly.io) account with a Fly app created for workspace machines
- `flyctl` installed locally
- `curl` + `jq`
## Setup
```bash
# 1. Set CONTAINER_BACKEND and Fly credentials on your platform process
# (add to your platform's .env or deployment config)
export CONTAINER_BACKEND=flyio
export FLY_API_TOKEN=<your-fly-deploy-token> # flyctl tokens create deploy
export FLY_WORKSPACE_APP=my-molecule-workspaces # fly app created for this purpose
export FLY_REGION=ord # optional, default: ord
# 2. Restart the platform so it picks up CONTAINER_BACKEND=flyio
# (varies by your deployment — docker restart, systemd reload, etc.)
# 3. Verify the platform is using the Fly provisioner
curl -s http://localhost:8080/healthz | jq .
# 4. Create a workspace — the platform provisions it as a Fly Machine
WS=$(curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "fly-worker",
"role": "Fly-provisioned inference worker",
"runtime": "hermes",
"tier": 2
}' | jq -r '.id')
echo "Workspace ID: $WS"
# 5. Watch the Fly Machine appear (~1530s)
flyctl machines list --app $FLY_WORKSPACE_APP
# 6. Poll until the workspace is ready
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 7. Smoke test — send an A2A task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"What region are you running in?"}]}}}' \
| jq '.result.parts[0].text'
# 8. Inspect the Fly Machine details
flyctl machines show --app $FLY_WORKSPACE_APP
# 9. Teardown (see scope note — use flyctl directly for now)
flyctl machines destroy --app $FLY_WORKSPACE_APP --force
```
## Expected output
Step 5 (`flyctl machines list`) shows the new machine with a `started` state within ~30 seconds. The platform injects your workspace secrets, `PLATFORM_URL`, and workspace ID as environment variables on the machine, then issues an auth token so the agent registers on boot.
Step 7 returns the agent's reply — proof that A2A JSON-RPC is routing through the Fly Machine correctly. The `FLY_REGION` env var is visible inside the container, so asking the agent "What region are you running in?" should return `ord` (or whichever region you set).
## Resource tiers
The Fly provisioner applies tier-based limits automatically — no manual machine sizing needed:
| Tier | RAM | CPUs | Use case |
|------|-----|------|----------|
| T2 | 512 MB | 1 | Light workers, eval agents |
| T3 | 2 GB | 2 | General-purpose orchestrators |
| T4 | 4 GB | 4 | Heavy inference, long-context tasks |
Set `"tier": 2`, `3`, or `4` in your `POST /workspaces` body. Runtime images are resolved from GHCR automatically (`hermes` → `ghcr.io/molecule-ai/workspace-hermes:latest`).
## Why Fly Machines
Fly Machines start in milliseconds and run in 35+ regions. Provisioning agent workspaces on Fly means your inference workers can live close to your users with no infrastructure code changes — just set `FLY_REGION` per workspace. Because the Fly provisioner implements the same `Provisioner` interface as the Docker backend, the rest of the platform is unchanged: same REST API, same A2A protocol, same workspace management UI.
## Related
- PR #501: [feat(platform): Fly Machines provisioner](https://github.com/Molecule-AI/molecule-core/pull/501)
- PR #481: [feat(ci): deploy to Fly after image push](https://github.com/Molecule-AI/molecule-core/pull/481)
- [Fly Machines API docs](https://fly.io/docs/machines/api/)
- [Platform API reference](../api-reference.md)
- Issue [#525](https://github.com/Molecule-AI/molecule-core/issues/525)

View File

@ -0,0 +1,74 @@
# Running a Google ADK Workspace on Molecule AI
Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
## What you'll need
- A Molecule AI account with at least one provisioned tenant
- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
- `curl` + `jq`
## Setup
```bash
# 1. Store your Google API key as a global secret
curl -s -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
# 2. Create a google-adk workspace
WS=$(curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "adk-agent",
"role": "Google ADK inference worker",
"runtime": "google-adk",
"model": "google:gemini-2.0-flash"
}' | jq -r '.id')
echo "Workspace: $WS"
# 3. Wait for ready (~30s)
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 4. Send your first task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
| jq '.result.parts[0].text'
# 5. Multi-turn — session state is preserved across calls
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
| jq '.result.parts[0].text'
# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
```
## Expected output
After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
## How it works
The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
## Mixed-runtime teams
ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
## Related
- PR #550: [feat(adapters): add google-adk runtime adapter](https://github.com/Molecule-AI/molecule-core/pull/550)
- [Google ADK (adk-python)](https://github.com/google/adk-python)
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
- [Platform API reference](../api-reference.md)

View File

@ -0,0 +1,184 @@
# Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History
Hermes is Molecule AI's inference router. Out of the box it proxies every model through an OpenAI-compatible shim. That works for plain text, but the shim does format translation on every round-trip — and it gets the Gemini message format wrong (Gemini expects `role: "model"` and a `parts: [{text}]` wrapper; the shim passes `role: "assistant"` and a flat string). It also flattens multi-turn conversations into a single user blob, losing role attribution across turns.
Phases 2a2c wire three native dispatch paths keyed on `auth_scheme`. This tutorial shows you how to unlock them.
> **Phase 2d scope note:** Tool calling, vision content blocks, system instructions, and streaming on the native paths are scoped for Phase 2d and are **not yet shipped**. This tutorial covers what is merged today: correct native dispatch + multi-turn history continuity.
## What you'll need
- A Molecule AI account with API access
- `ANTHROPIC_API_KEY` **or** `GEMINI_API_KEY` (or both)
- `curl` + `jq`
## The dispatch table
After Phases 2a / 2b / 2c, Hermes picks an inference path based on which provider is configured:
| `auth_scheme` | Dispatch path | Provider | API |
|---|---|---|---|
| `openai` | `_do_openai_compat` | 13 providers (OpenRouter, Groq, Mistral…) | OpenAI-compat shim |
| `anthropic` | `_do_anthropic_native` | Anthropic | Native Messages API |
| `gemini` | `_do_gemini_native` | Google | Native `generateContent` |
| unknown | `_do_openai_compat` + warning | any | OpenAI-compat shim (forward-compat) |
**Rule of thumb:** set `ANTHROPIC_API_KEY` to get native Anthropic dispatch. Set `GEMINI_API_KEY` to get native Gemini dispatch. Set `NOUS_API_KEY` / `HERMES_API_KEY` / `OPENROUTER_API_KEY` to stay on the compat shim. Molecule AI reads these in priority order: `HERMES_API_KEY``OPENROUTER_API_KEY``ANTHROPIC_API_KEY``GEMINI_API_KEY`. The **first key found wins**, so don't set `HERMES_API_KEY` if you want native dispatch.
---
## Setup
```bash
# 0. Export your platform URL and a workspace to use as orchestrator
export MOLECULE_API=http://localhost:8080
export ORCH_ID=<your-orchestrator-workspace-id>
# 1. Store your Anthropic key as a global secret
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-YOUR-KEY"}' | jq .
# 2. Create a Hermes workspace — Anthropic native dispatch
ANTHROPIC_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-anthropic",
"role": "Inference worker — native Anthropic path",
"runtime": "hermes",
"model": "anthropic:claude-sonnet-4-5"
}' | jq -r '.id')
echo "Anthropic workspace: $ANTHROPIC_WS"
# 3. Wait for it to be ready (~2030s)
until curl -s $MOLECULE_API/workspaces/$ANTHROPIC_WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 4. Store your Gemini key as a global secret
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}' | jq .
# 5. Create a Hermes workspace — Gemini native dispatch
GEMINI_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-gemini",
"role": "Inference worker — native Gemini path",
"runtime": "hermes",
"model": "gemini:gemini-2.0-flash"
}' | jq -r '.id')
echo "Gemini workspace: $GEMINI_WS"
# 6. Pin the Gemini workspace to Gemini-only keys (no ANTHROPIC_API_KEY override)
curl -s -X PUT $MOLECULE_API/workspaces/$GEMINI_WS/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":""}' | jq .
# 7. Confirm dispatch — send a single-turn probe to the Anthropic workspace
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"Which API are you using to generate this response?"}]}}
}' | jq '.result.parts[0].text'
# 8. Same probe to the Gemini workspace
curl -s -X POST $MOLECULE_API/workspaces/$GEMINI_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"Which API are you using to generate this response?"}]}}
}' | jq '.result.parts[0].text'
# 9. Multi-turn history — Phase 2c keeps turns as turns (not flattened)
# Send turn 1
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"My name is Alice. Remember that."}]}}
}' | jq '.result.parts[0].text'
# 10. Send turn 2 — history is automatically threaded by Hermes Phase 2c
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text","text":"What is my name?"}]}}
}' | jq '.result.parts[0].text'
# Expected: "Alice" — not "I don't know", which the old flattened path could produce
```
## Expected output
**Step 7 (Anthropic workspace):** The agent confirms it is calling the Anthropic Messages API natively. Hermes executed `_do_anthropic_native` — no OpenAI-compat translation layer.
**Step 8 (Gemini workspace):** The agent confirms Google `generateContent`. Hermes called `_do_gemini_native`, which passes `role: "model"` (not `"assistant"`) and the `parts: [{text: ...}]` wrapper the native SDK requires. The compat-shim translation that produced incorrect message format is bypassed.
**Step 10 (multi-turn, Phase 2c):** Returns `"Alice"`. Before Phase 2c, history was flattened into a single user blob — the model could recover the gist but lost clean role attribution. Phase 2c passes turns as turns: OpenAI uses `{role, content}`, Anthropic uses the same wire shape for text-only, Gemini uses `{role: "model", parts: [{text}]}`.
## How dispatch works under the hood
`HermesA2AExecutor._do_inference(user_message, history)` reads `self.provider_cfg.auth_scheme`:
```python
if self.provider_cfg.auth_scheme == "anthropic":
return await self._do_anthropic_native(user_message, history)
elif self.provider_cfg.auth_scheme == "gemini":
return await self._do_gemini_native(user_message, history)
else: # "openai" + unknown (forward-compat fallback)
return await self._do_openai_compat(user_message, history)
```
Fail-loud semantics: if the `anthropic` package isn't installed, `_do_anthropic_native` raises a clear `RuntimeError` before any inference attempt. Same for `google-genai`. Silent fallback to the compat shim would mask format errors — Molecule AI chooses loud failure.
## Building a multi-provider team
The real win surfaces in a mixed-provider agent team. Your orchestrator can fan tasks to an Anthropic worker and a Gemini worker simultaneously, each receiving properly formatted messages through their native API paths:
```bash
# Fan out from the orchestrator — both fire in parallel
curl -s -X POST $MOLECULE_API/workspaces/$ORCH_ID/a2a \
-H "Content-Type: application/json" \
-d "{
\"jsonrpc\":\"2.0\",\"id\":\"fan-1\",\"method\":\"message/send\",
\"params\":{\"message\":{\"role\":\"user\",\"parts\":[{\"kind\":\"text\",
\"text\":\"delegate_task_async $ANTHROPIC_WS 'Draft release notes for v2.1' AND delegate_task_async $GEMINI_WS 'Summarise the last 30 days of support tickets'\"}]}}
}" | jq .
```
Both workers use their native inference paths. No LiteLLM proxy layer. No format translation on every request. The orchestrator gets results back through the same A2A protocol regardless of which underlying model powered each task.
## Capability comparison: Hermes native vs the compat shim
What is shipping today (Phases 2a + 2b + 2c — all merged to main):
| Capability | OpenAI-compat shim | Anthropic native | Gemini native |
|---|---|---|---|
| Plain text (single-turn) | ✅ | ✅ | ✅ |
| Multi-turn history | ⚠️ flattened into one user blob | ✅ role-attributed turns | ✅ `role: "model"` + `parts` wrapper |
| Correct Gemini message format | ❌ wrong role + missing parts wrapper | — | ✅ |
| No compat-shim translation overhead | ❌ every request translated | ✅ | ✅ |
What is on the roadmap for Phase 2d (not yet shipped):
| Capability | Anthropic native | Gemini native |
|---|---|---|
| `tool_use` / `tool_result` blocks | 📋 Phase 2d | 📋 Phase 2d |
| Vision content blocks | 📋 Phase 2d | 📋 Phase 2d |
| System instructions (`system=`) | 📋 Phase 2d | 📋 Phase 2d (`system_instruction=`) |
| Extended thinking | 📋 Phase 2d | — |
| Streaming | 📋 Phase 2d | 📋 Phase 2d |
**Why Molecule AI vs Letta / AG2 / n8n:** Those frameworks handle multi-LLM at the application layer — you write different agent classes per provider. Molecule AI handles it at the infrastructure layer. Your workspace configs change; your orchestration code doesn't. Swap a Gemini worker for an Anthropic worker by changing one secret. No code redeploy.
## Related
- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
- [Hermes adapter design](../adapters/hermes-adapter-design.md)
- [Platform API reference](../api-reference.md)
- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)

View File

@ -0,0 +1,282 @@
// Package artifacts provides a minimal Go client for the Cloudflare Artifacts
// REST API (private beta Apr 2026, public beta May 2026).
//
// API reference: https://developers.cloudflare.com/artifacts/api/rest-api/
// Blog post: https://blog.cloudflare.com/artifacts-git-for-agents-beta/
//
// Base URL: https://artifacts.cloudflare.net/v1/api/namespaces/{namespace}
// Auth: Authorization: Bearer <CLOUDFLARE_API_TOKEN>
//
// This client covers the subset of operations needed for the Molecule AI
// workspace-snapshot demo:
// - CreateRepo — provision a bare Git repo for a workspace
// - GetRepo — fetch repo metadata (remote URL, created_at, …)
// - ForkRepo — create an isolated copy (e.g. workspace branching)
// - ImportRepo — bootstrap from an external GitHub/GitLab URL
// - DeleteRepo — clean-up
// - CreateToken — mint a short-lived Git credential for clone/push
// - RevokeToken — invalidate an issued token
package artifacts
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
)
const (
defaultBaseURL = "https://artifacts.cloudflare.net/v1/api"
defaultTimeout = 30 * time.Second
)
// Client is a thin HTTP wrapper around the Cloudflare Artifacts REST API.
// Instantiate with New(); override BaseURL in tests via NewWithBaseURL().
type Client struct {
baseURL string // e.g. https://artifacts.cloudflare.net/v1/api/namespaces/my-ns
apiToken string // Cloudflare API token — never logged
httpClient *http.Client
}
// New returns a Client scoped to the given namespace.
// apiToken is a Cloudflare API token with Artifacts write permissions.
// namespace identifies the CF Artifacts namespace (maps to CLOUDFLARE_ARTIFACTS_NAMESPACE).
func New(apiToken, namespace string) *Client {
return NewWithBaseURL(apiToken, namespace, defaultBaseURL)
}
// NewWithBaseURL is the same as New but lets callers override the base URL —
// primarily used in unit tests to point at an httptest.Server.
func NewWithBaseURL(apiToken, namespace, baseURL string) *Client {
ns := url.PathEscape(namespace)
return &Client{
baseURL: fmt.Sprintf("%s/namespaces/%s", baseURL, ns),
apiToken: apiToken,
httpClient: &http.Client{
Timeout: defaultTimeout,
},
}
}
// ---- Domain types --------------------------------------------------------
// Repo represents a single Cloudflare Artifacts repository.
type Repo struct {
// Name is the user-supplied identifier within the namespace.
Name string `json:"name"`
// ID is the opaque CF-assigned identifier.
ID string `json:"id,omitempty"`
// RemoteURL is the authenticated Git remote in the form
// https://x:<TOKEN>@<hash>.artifacts.cloudflare.net/git/repo-<id>.git
RemoteURL string `json:"remote_url,omitempty"`
// ReadOnly marks repos that accept only fetch/clone operations.
ReadOnly bool `json:"read_only,omitempty"`
// Description is an optional human-readable label.
Description string `json:"description,omitempty"`
CreatedAt time.Time `json:"created_at,omitempty"`
UpdatedAt time.Time `json:"updated_at,omitempty"`
}
// ForkResult is the response body from POST /repos/:name/fork.
type ForkResult struct {
Repo Repo `json:"repo"`
ObjectCount int `json:"object_count,omitempty"`
}
// RepoToken is a short-lived credential for Git operations against a single repo.
// The plaintext Token value is returned only once — callers must store it.
type RepoToken struct {
ID string `json:"id"`
Token string `json:"token"`
Scope string `json:"scope"` // "read" | "write"
ExpiresAt time.Time `json:"expires_at"`
}
// ---- Request payloads ----------------------------------------------------
// CreateRepoRequest is the body for POST /repos.
type CreateRepoRequest struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
DefaultBranch string `json:"default_branch,omitempty"`
ReadOnly bool `json:"read_only,omitempty"`
}
// ForkRepoRequest is the body for POST /repos/:name/fork.
type ForkRepoRequest struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
ReadOnly bool `json:"read_only,omitempty"`
DefaultBranchOnly bool `json:"default_branch_only,omitempty"`
}
// ImportRepoRequest is the body for POST /repos/:name/import.
type ImportRepoRequest struct {
// URL is the HTTPS URL of the source Git repository.
URL string `json:"url"`
Branch string `json:"branch,omitempty"`
Depth int `json:"depth,omitempty"`
ReadOnly bool `json:"read_only,omitempty"`
}
// CreateTokenRequest is the body for POST /tokens.
type CreateTokenRequest struct {
// Repo is the name of the repository to scope the token to.
Repo string `json:"repo"`
Scope string `json:"scope,omitempty"` // "read" | "write"; default "write"
// TTL is the lifetime in seconds. Default 86400 (24h).
TTL int `json:"ttl,omitempty"`
}
// ---- API error -----------------------------------------------------------
// APIError represents a non-2xx response from the Cloudflare v4 envelope.
type APIError struct {
StatusCode int
Code int `json:"code"`
Message string `json:"message"`
}
func (e *APIError) Error() string {
return fmt.Sprintf("cloudflare artifacts: HTTP %d — code %d: %s", e.StatusCode, e.Code, e.Message)
}
// ---- HTTP helpers --------------------------------------------------------
// do executes an HTTP request, checks the Cloudflare v4 envelope, and
// JSON-decodes the "result" field into out (pass nil to discard).
func (c *Client) do(ctx context.Context, method, path string, body, out interface{}) error {
var bodyReader io.Reader
if body != nil {
b, err := json.Marshal(body)
if err != nil {
return fmt.Errorf("artifacts: marshal request: %w", err)
}
bodyReader = bytes.NewReader(b)
}
req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, bodyReader)
if err != nil {
return fmt.Errorf("artifacts: build request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+c.apiToken)
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.httpClient.Do(req)
if err != nil {
return fmt.Errorf("artifacts: request %s %s: %w", method, path, err)
}
defer resp.Body.Close()
// Decode the Cloudflare v4 envelope. Cap at 1 MiB to prevent a
// malicious or runaway upstream response from exhausting memory.
var envelope struct {
Result json.RawMessage `json:"result"`
Success bool `json:"success"`
Errors []struct {
Code int `json:"code"`
Message string `json:"message"`
} `json:"errors"`
}
if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&envelope); err != nil {
// Non-JSON body (network error page, etc.)
return &APIError{StatusCode: resp.StatusCode, Message: fmt.Sprintf("non-JSON body (status %d)", resp.StatusCode)}
}
if !envelope.Success || resp.StatusCode >= 300 {
apiErr := &APIError{StatusCode: resp.StatusCode}
if len(envelope.Errors) > 0 {
apiErr.Code = envelope.Errors[0].Code
apiErr.Message = envelope.Errors[0].Message
} else {
apiErr.Message = "unknown error"
}
return apiErr
}
if out != nil && len(envelope.Result) > 0 {
if err := json.Unmarshal(envelope.Result, out); err != nil {
return fmt.Errorf("artifacts: decode result: %w", err)
}
}
return nil
}
// ---- Repo operations -----------------------------------------------------
// CreateRepo provisions a new bare Git repo in the namespace.
// Corresponds to POST /repos.
func (c *Client) CreateRepo(ctx context.Context, req CreateRepoRequest) (*Repo, error) {
var repo Repo
if err := c.do(ctx, http.MethodPost, "/repos", req, &repo); err != nil {
return nil, err
}
return &repo, nil
}
// GetRepo fetches metadata for an existing repo.
// Corresponds to GET /repos/:name.
func (c *Client) GetRepo(ctx context.Context, name string) (*Repo, error) {
var repo Repo
path := "/repos/" + url.PathEscape(name)
if err := c.do(ctx, http.MethodGet, path, nil, &repo); err != nil {
return nil, err
}
return &repo, nil
}
// ForkRepo creates an isolated copy of an existing repo.
// Corresponds to POST /repos/:name/fork.
func (c *Client) ForkRepo(ctx context.Context, sourceName string, req ForkRepoRequest) (*ForkResult, error) {
var result ForkResult
path := "/repos/" + url.PathEscape(sourceName) + "/fork"
if err := c.do(ctx, http.MethodPost, path, req, &result); err != nil {
return nil, err
}
return &result, nil
}
// ImportRepo bootstraps a repo from an external HTTPS Git URL.
// Corresponds to POST /repos/:name/import.
func (c *Client) ImportRepo(ctx context.Context, name string, req ImportRepoRequest) (*Repo, error) {
var repo Repo
path := "/repos/" + url.PathEscape(name) + "/import"
if err := c.do(ctx, http.MethodPost, path, req, &repo); err != nil {
return nil, err
}
return &repo, nil
}
// DeleteRepo deletes a repo (returns 202 Accepted).
// Corresponds to DELETE /repos/:name.
func (c *Client) DeleteRepo(ctx context.Context, name string) error {
path := "/repos/" + url.PathEscape(name)
return c.do(ctx, http.MethodDelete, path, nil, nil)
}
// ---- Token operations ----------------------------------------------------
// CreateToken mints a short-lived Git credential scoped to a single repo.
// The plaintext token is in the returned RepoToken.Token field — it will not
// be available again after this call returns.
// Corresponds to POST /tokens.
func (c *Client) CreateToken(ctx context.Context, req CreateTokenRequest) (*RepoToken, error) {
var token RepoToken
if err := c.do(ctx, http.MethodPost, "/tokens", req, &token); err != nil {
return nil, err
}
return &token, nil
}
// RevokeToken invalidates an issued token by its ID.
// Corresponds to DELETE /tokens/:id.
func (c *Client) RevokeToken(ctx context.Context, tokenID string) error {
path := "/tokens/" + url.PathEscape(tokenID)
return c.do(ctx, http.MethodDelete, path, nil, nil)
}

View File

@ -0,0 +1,370 @@
package artifacts_test
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/artifacts"
)
// cfEnvelope wraps a result value in the Cloudflare v4 response envelope.
func cfEnvelope(t *testing.T, result interface{}) []byte {
t.Helper()
b, err := json.Marshal(result)
if err != nil {
t.Fatalf("cfEnvelope: marshal result: %v", err)
}
env := map[string]interface{}{
"success": true,
"result": json.RawMessage(b),
"errors": []interface{}{},
}
out, err := json.Marshal(env)
if err != nil {
t.Fatalf("cfEnvelope: marshal envelope: %v", err)
}
return out
}
// cfError returns a Cloudflare v4 error envelope.
func cfError(t *testing.T, statusCode, code int, message string) ([]byte, int) {
t.Helper()
env := map[string]interface{}{
"success": false,
"result": nil,
"errors": []map[string]interface{}{
{"code": code, "message": message},
},
}
b, _ := json.Marshal(env)
return b, statusCode
}
func newTestClient(t *testing.T, mux *http.ServeMux) *artifacts.Client {
t.Helper()
srv := httptest.NewServer(mux)
t.Cleanup(srv.Close)
return artifacts.NewWithBaseURL("test-token", "test-ns", srv.URL)
}
// ---- CreateRepo ----------------------------------------------------------
func TestCreateRepo_Success(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
// Verify auth header
if r.Header.Get("Authorization") != "Bearer test-token" {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
// Decode request body
var req map[string]interface{}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "bad request", http.StatusBadRequest)
return
}
if req["name"] != "my-workspace-repo" {
http.Error(w, "unexpected name", http.StatusBadRequest)
return
}
repo := artifacts.Repo{
Name: "my-workspace-repo",
ID: "repo-abc123",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/repo-abc123.git",
CreatedAt: time.Now(),
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfEnvelope(t, repo))
})
client := newTestClient(t, mux)
repo, err := client.CreateRepo(context.Background(), artifacts.CreateRepoRequest{
Name: "my-workspace-repo",
Description: "Molecule AI workspace snapshot",
})
if err != nil {
t.Fatalf("CreateRepo: unexpected error: %v", err)
}
if repo.Name != "my-workspace-repo" {
t.Errorf("repo.Name = %q, want %q", repo.Name, "my-workspace-repo")
}
if repo.ID != "repo-abc123" {
t.Errorf("repo.ID = %q, want %q", repo.ID, "repo-abc123")
}
if repo.RemoteURL == "" {
t.Error("repo.RemoteURL is empty, want non-empty")
}
}
func TestCreateRepo_APIError(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos", func(w http.ResponseWriter, r *http.Request) {
body, status := cfError(t, http.StatusConflict, 1009, "repo already exists")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
w.Write(body)
})
client := newTestClient(t, mux)
_, err := client.CreateRepo(context.Background(), artifacts.CreateRepoRequest{Name: "dup"})
if err == nil {
t.Fatal("expected error, got nil")
}
apiErr, ok := err.(*artifacts.APIError)
if !ok {
t.Fatalf("expected *APIError, got %T: %v", err, err)
}
if apiErr.StatusCode != http.StatusConflict {
t.Errorf("StatusCode = %d, want %d", apiErr.StatusCode, http.StatusConflict)
}
if apiErr.Message != "repo already exists" {
t.Errorf("Message = %q, want %q", apiErr.Message, "repo already exists")
}
}
// ---- GetRepo -------------------------------------------------------------
func TestGetRepo_Success(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos/my-repo", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
repo := artifacts.Repo{
Name: "my-repo",
ID: "repo-xyz",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/repo-xyz.git",
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfEnvelope(t, repo))
})
client := newTestClient(t, mux)
repo, err := client.GetRepo(context.Background(), "my-repo")
if err != nil {
t.Fatalf("GetRepo: unexpected error: %v", err)
}
if repo.Name != "my-repo" {
t.Errorf("repo.Name = %q, want %q", repo.Name, "my-repo")
}
}
func TestGetRepo_NotFound(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos/missing", func(w http.ResponseWriter, r *http.Request) {
body, status := cfError(t, http.StatusNotFound, 1004, "repo not found")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
w.Write(body)
})
client := newTestClient(t, mux)
_, err := client.GetRepo(context.Background(), "missing")
if err == nil {
t.Fatal("expected error, got nil")
}
apiErr, ok := err.(*artifacts.APIError)
if !ok {
t.Fatalf("expected *APIError, got %T", err)
}
if apiErr.StatusCode != http.StatusNotFound {
t.Errorf("StatusCode = %d, want %d", apiErr.StatusCode, http.StatusNotFound)
}
}
// ---- ForkRepo ------------------------------------------------------------
func TestForkRepo_Success(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos/source-repo/fork", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
if req["name"] != "forked-repo" {
http.Error(w, "unexpected fork name", http.StatusBadRequest)
return
}
result := artifacts.ForkResult{
Repo: artifacts.Repo{
Name: "forked-repo",
ID: "repo-fork-1",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/repo-fork-1.git",
},
ObjectCount: 42,
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfEnvelope(t, result))
})
client := newTestClient(t, mux)
result, err := client.ForkRepo(context.Background(), "source-repo", artifacts.ForkRepoRequest{
Name: "forked-repo",
})
if err != nil {
t.Fatalf("ForkRepo: unexpected error: %v", err)
}
if result.Repo.Name != "forked-repo" {
t.Errorf("Repo.Name = %q, want %q", result.Repo.Name, "forked-repo")
}
if result.ObjectCount != 42 {
t.Errorf("ObjectCount = %d, want 42", result.ObjectCount)
}
}
// ---- ImportRepo ----------------------------------------------------------
func TestImportRepo_Success(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos/imported/import", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
if req["url"] == "" {
http.Error(w, "url required", http.StatusBadRequest)
return
}
repo := artifacts.Repo{
Name: "imported",
ID: "repo-imp-1",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/repo-imp-1.git",
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfEnvelope(t, repo))
})
client := newTestClient(t, mux)
repo, err := client.ImportRepo(context.Background(), "imported", artifacts.ImportRepoRequest{
URL: "https://github.com/Molecule-AI/molecule-core.git",
Branch: "main",
Depth: 1,
})
if err != nil {
t.Fatalf("ImportRepo: unexpected error: %v", err)
}
if repo.Name != "imported" {
t.Errorf("repo.Name = %q, want %q", repo.Name, "imported")
}
}
// ---- DeleteRepo ----------------------------------------------------------
func TestDeleteRepo_Success(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos/to-delete", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodDelete {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
deleted := map[string]string{"id": "repo-del-1"}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusAccepted)
w.Write(cfEnvelope(t, deleted))
})
client := newTestClient(t, mux)
if err := client.DeleteRepo(context.Background(), "to-delete"); err != nil {
t.Fatalf("DeleteRepo: unexpected error: %v", err)
}
}
// ---- CreateToken ---------------------------------------------------------
func TestCreateToken_Success(t *testing.T) {
expiry := time.Now().Add(24 * time.Hour).UTC().Truncate(time.Second)
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/tokens", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
if req["repo"] != "my-repo" {
http.Error(w, "unexpected repo", http.StatusBadRequest)
return
}
tok := artifacts.RepoToken{
ID: "tok-123",
Token: "plaintext-secret-abc",
Scope: "write",
ExpiresAt: expiry,
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfEnvelope(t, tok))
})
client := newTestClient(t, mux)
tok, err := client.CreateToken(context.Background(), artifacts.CreateTokenRequest{
Repo: "my-repo",
Scope: "write",
TTL: 86400,
})
if err != nil {
t.Fatalf("CreateToken: unexpected error: %v", err)
}
if tok.ID != "tok-123" {
t.Errorf("ID = %q, want %q", tok.ID, "tok-123")
}
if tok.Token != "plaintext-secret-abc" {
t.Errorf("Token = %q, want %q", tok.Token, "plaintext-secret-abc")
}
if tok.Scope != "write" {
t.Errorf("Scope = %q, want %q", tok.Scope, "write")
}
}
// ---- RevokeToken ---------------------------------------------------------
func TestRevokeToken_Success(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/tokens/tok-456", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodDelete {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
deleted := map[string]string{"id": "tok-456"}
w.Header().Set("Content-Type", "application/json")
w.Write(cfEnvelope(t, deleted))
})
client := newTestClient(t, mux)
if err := client.RevokeToken(context.Background(), "tok-456"); err != nil {
t.Fatalf("RevokeToken: unexpected error: %v", err)
}
}
// ---- Context cancellation ------------------------------------------------
func TestCreateRepo_ContextCancelled(t *testing.T) {
// Server that never responds (simulates a hung connection)
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos", func(w http.ResponseWriter, r *http.Request) {
// Block until the client gives up
<-r.Context().Done()
})
client := newTestClient(t, mux)
ctx, cancel := context.WithCancel(context.Background())
cancel() // cancel immediately
_, err := client.CreateRepo(ctx, artifacts.CreateRepoRequest{Name: "x"})
if err == nil {
t.Fatal("expected error from cancelled context, got nil")
}
}

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"log"
"sync"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
@ -14,8 +15,17 @@ import (
const broadcastChannel = "events:broadcast"
// sseSubscription is a single in-process SSE subscriber.
// deliverToSSE writes to ch; StreamEvents reads from it.
type sseSubscription struct {
workspaceID string
ch chan models.WSMessage
}
type Broadcaster struct {
hub *ws.Hub
hub *ws.Hub
ssesMu sync.RWMutex
sses []*sseSubscription
}
func NewBroadcaster(hub *ws.Hub) *Broadcaster {
@ -59,6 +69,9 @@ func (b *Broadcaster) RecordAndBroadcast(ctx context.Context, eventType string,
// Broadcast to local WebSocket clients
b.hub.Broadcast(msg)
// Fan out to in-process SSE subscribers (e.g. GET /events/stream).
b.deliverToSSE(msg)
return nil
}
@ -79,6 +92,52 @@ func (b *Broadcaster) BroadcastOnly(workspaceID string, eventType string, payloa
}
b.hub.Broadcast(msg)
// Fan out to in-process SSE subscribers.
b.deliverToSSE(msg)
}
// SubscribeSSE registers a per-workspace in-process channel for SSE streaming.
// The caller MUST invoke the returned cancel func when it disconnects so the
// subscription is removed and the channel is not leaked.
func (b *Broadcaster) SubscribeSSE(workspaceID string) (<-chan models.WSMessage, func()) {
sub := &sseSubscription{
workspaceID: workspaceID,
ch: make(chan models.WSMessage, 64),
}
b.ssesMu.Lock()
b.sses = append(b.sses, sub)
b.ssesMu.Unlock()
cancel := func() {
b.ssesMu.Lock()
defer b.ssesMu.Unlock()
for i, s := range b.sses {
if s == sub {
b.sses = append(b.sses[:i], b.sses[i+1:]...)
break
}
}
}
return sub.ch, cancel
}
// deliverToSSE fans msg out to every in-process SSE subscriber watching the
// same workspace. Non-blocking: if a subscriber's buffer is full the event is
// dropped with a log line (the WebSocket path still delivers it).
func (b *Broadcaster) deliverToSSE(msg models.WSMessage) {
b.ssesMu.RLock()
defer b.ssesMu.RUnlock()
for _, s := range b.sses {
if s.workspaceID != msg.WorkspaceID {
continue
}
select {
case s.ch <- msg:
default:
log.Printf("SSE: subscriber buffer full for workspace %s, dropping event %s", msg.WorkspaceID, msg.Event)
}
}
}
// Subscribe listens to Redis pub/sub and relays events to the WebSocket hub.

View File

@ -203,6 +203,33 @@ func (h *WorkspaceHandler) ProxyA2A(c *gin.Context) {
c.Data(status, "application/json", respBody)
}
// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace
// has a budget_limit set and monthly_spend has reached or exceeded it.
// DB errors are logged and treated as fail-open — a budget check failure
// must not block legitimate A2A traffic.
func (h *WorkspaceHandler) checkWorkspaceBudget(ctx context.Context, workspaceID string) *proxyA2AError {
var budgetLimit sql.NullInt64
var monthlySpend int64
err := db.DB.QueryRowContext(ctx,
`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&budgetLimit, &monthlySpend)
if err != nil {
if err != sql.ErrNoRows {
log.Printf("ProxyA2A: budget check failed for %s: %v", workspaceID, err)
}
return nil // fail-open
}
if budgetLimit.Valid && monthlySpend >= budgetLimit.Int64 {
log.Printf("ProxyA2A: budget exceeded for %s (spend=%d limit=%d)", workspaceID, monthlySpend, budgetLimit.Int64)
return &proxyA2AError{
Status: http.StatusPaymentRequired,
Response: gin.H{"error": "workspace budget limit exceeded"},
}
}
return nil
}
func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID string, body []byte, callerID string, logActivity bool) (int, []byte, *proxyA2AError) {
// Access control: workspace-to-workspace requests must pass CanCommunicate check.
// Canvas requests (callerID == "") and system callers (webhook:*, system:*, test:*)
@ -217,6 +244,14 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
}
}
// Budget enforcement: reject A2A calls when the workspace has exceeded its
// monthly spend ceiling. Checked after access control so unauthorized calls
// are rejected first (403 > 429 in the denial hierarchy). Fail-open on DB
// errors so a budget check failure never blocks legitimate traffic.
if proxyErr := h.checkWorkspaceBudget(ctx, workspaceID); proxyErr != nil {
return 0, nil, proxyErr
}
agentURL, proxyErr := h.resolveAgentURL(ctx, workspaceID)
if proxyErr != nil {
return 0, nil, proxyErr
@ -251,6 +286,12 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
if logActivity {
h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs)
}
// Track LLM token usage for cost transparency (#593).
// Fires in a detached goroutine so token accounting never adds latency
// to the critical A2A path.
go extractAndUpsertTokenUsage(context.WithoutCancel(ctx), workspaceID, respBody)
return resp.StatusCode, respBody, nil
}
@ -577,3 +618,65 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) e
// token" branch so the handler-level guard can detect it without string
// matching (the wsauth errors are typed for the invalid case).
var errInvalidCallerToken = errors.New("missing caller auth token")
// extractAndUpsertTokenUsage parses LLM usage from a raw A2A response body
// and persists it via upsertTokenUsage. Safe to call in a goroutine — logs
// errors but never panics. ctx must already be detached from the request.
func extractAndUpsertTokenUsage(ctx context.Context, workspaceID string, respBody []byte) {
in, out := parseUsageFromA2AResponse(respBody)
if in > 0 || out > 0 {
upsertTokenUsage(ctx, workspaceID, in, out)
}
}
// parseUsageFromA2AResponse extracts input_tokens / output_tokens from an A2A
// JSON-RPC response. Inspects two locations in order of preference:
// 1. result.usage — the JSON-RPC 2.0 result envelope from workspace agents.
// 2. usage — top-level, for non-JSON-RPC or direct Anthropic-shaped payloads.
//
// Returns (0, 0) when no recognisable usage data is found.
func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
if len(body) == 0 {
return 0, 0
}
var top map[string]json.RawMessage
if err := json.Unmarshal(body, &top); err != nil {
return 0, 0
}
// 1. result.usage (JSON-RPC 2.0 wrapper produced by workspace agents).
if rawResult, ok := top["result"]; ok {
var result map[string]json.RawMessage
if err := json.Unmarshal(rawResult, &result); err == nil {
if in, out, ok := readUsageMap(result); ok {
return in, out
}
}
}
// 2. Fallback: top-level usage (direct Anthropic or non-JSON-RPC response).
if in, out, ok := readUsageMap(top); ok {
return in, out
}
return 0, 0
}
// readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
// Returns (0, 0, false) when the key is absent or contains no non-zero values.
func readUsageMap(m map[string]json.RawMessage) (inputTokens, outputTokens int64, ok bool) {
rawUsage, has := m["usage"]
if !has {
return 0, 0, false
}
var usage struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
}
if err := json.Unmarshal(rawUsage, &usage); err != nil {
return 0, 0, false
}
if usage.InputTokens == 0 && usage.OutputTokens == 0 {
return 0, 0, false
}
return usage.InputTokens, usage.OutputTokens, true
}

View File

@ -0,0 +1,455 @@
package handlers
// ArtifactsHandler exposes the Cloudflare Artifacts demo integration.
//
// Routes (all behind WorkspaceAuth middleware):
//
// POST /workspaces/:id/artifacts — attach a CF Artifacts repo to this workspace
// GET /workspaces/:id/artifacts — get the linked repo info
// POST /workspaces/:id/artifacts/fork — fork this workspace's repo
// POST /workspaces/:id/artifacts/token — mint a short-lived git credential
//
// Configuration (env vars, loaded once at platform startup):
//
// CF_ARTIFACTS_API_TOKEN — Cloudflare API token with Artifacts write permissions
// CF_ARTIFACTS_NAMESPACE — Cloudflare Artifacts namespace name
//
// When either env var is absent the handler returns 503 with a clear message so
// callers know the feature is not yet configured (private beta onboarding).
//
// See: https://developers.cloudflare.com/artifacts/
import (
"database/sql"
"log"
"net/http"
"os"
"regexp"
"strings"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/artifacts"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// repoNameRE validates CF Artifacts repo names: start with alphanumeric,
// then up to 62 alphanumeric/hyphen/underscore chars (63 total max).
var repoNameRE = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$`)
// cfErrMessage returns a safe error message for CF API errors.
// For CF 5xx errors (or non-CF errors), returns a generic "upstream service error"
// to avoid leaking internal CF error details to clients.
func cfErrMessage(err error) string {
apiErr, ok := err.(*artifacts.APIError)
if !ok || apiErr.StatusCode >= 500 {
return "upstream service error"
}
return apiErr.Message
}
// ArtifactsHandler holds a pre-built CF Artifacts client.
// The client is nil when CF_ARTIFACTS_API_TOKEN / CF_ARTIFACTS_NAMESPACE are unset.
type ArtifactsHandler struct {
client *artifacts.Client
namespace string
}
// NewArtifactsHandler reads CF_ARTIFACTS_API_TOKEN and CF_ARTIFACTS_NAMESPACE
// from the environment and builds the client. If either is absent the handler
// still registers — every method simply returns 503.
func NewArtifactsHandler() *ArtifactsHandler {
token := os.Getenv("CF_ARTIFACTS_API_TOKEN")
ns := os.Getenv("CF_ARTIFACTS_NAMESPACE")
if token == "" || ns == "" {
log.Printf("artifacts: CF_ARTIFACTS_API_TOKEN or CF_ARTIFACTS_NAMESPACE not set — demo endpoints will return 503")
return &ArtifactsHandler{}
}
return &ArtifactsHandler{
client: artifacts.New(token, ns),
namespace: ns,
}
}
// newArtifactsHandlerWithClient is the injectable constructor used in tests.
func newArtifactsHandlerWithClient(client *artifacts.Client, namespace string) *ArtifactsHandler {
return &ArtifactsHandler{client: client, namespace: namespace}
}
// configured returns false (and writes a 503) when the CF client is missing.
func (h *ArtifactsHandler) configured(c *gin.Context) bool {
if h.client == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{
"error": "Cloudflare Artifacts not configured — set CF_ARTIFACTS_API_TOKEN and CF_ARTIFACTS_NAMESPACE",
})
return false
}
return true
}
// ---- POST /workspaces/:id/artifacts ------------------------------------
// createArtifactsRepoRequest is the body for attaching/creating a CF Artifacts repo.
type createArtifactsRepoRequest struct {
// Name is the desired CF repo name. Defaults to "molecule-ws-<workspace_id>" when empty.
Name string `json:"name"`
// Description is an optional label stored in CF and in the local DB.
Description string `json:"description"`
// ImportURL, when non-empty, bootstraps the repo from an existing Git URL
// (e.g. "https://github.com/org/repo.git") instead of creating an empty repo.
ImportURL string `json:"import_url"`
// ImportBranch restricts the import to a single branch (only used with ImportURL).
ImportBranch string `json:"import_branch"`
// ImportDepth sets a shallow-clone depth for the import (0 = full history).
ImportDepth int `json:"import_depth"`
// ReadOnly marks the new repo as fetch/clone-only.
ReadOnly bool `json:"read_only"`
}
// workspaceArtifactRow is the DB row shape returned by queries.
type workspaceArtifactRow struct {
ID string `json:"id"`
WorkspaceID string `json:"workspace_id"`
CFRepoName string `json:"cf_repo_name"`
CFNamespace string `json:"cf_namespace"`
RemoteURL string `json:"remote_url,omitempty"`
Description string `json:"description,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// Create handles POST /workspaces/:id/artifacts.
// Creates or imports a Cloudflare Artifacts repo and links it to the workspace.
// Returns 409 if a repo is already linked.
func (h *ArtifactsHandler) Create(c *gin.Context) {
if !h.configured(c) {
return
}
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Reject if already linked.
var exists bool
db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspace_artifacts WHERE workspace_id = $1)`,
workspaceID,
).Scan(&exists)
if exists {
c.JSON(http.StatusConflict, gin.H{"error": "workspace already has a linked Artifacts repo — delete it first"})
return
}
var req createArtifactsRepoRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
// Default repo name: "molecule-ws-<workspace_id>" (truncated at 63 chars).
repoName := req.Name
if repoName == "" {
repoName = "molecule-ws-" + workspaceID
if len(repoName) > 63 {
repoName = repoName[:63]
}
}
// Validate explicit repo names; auto-generated names are always safe.
if req.Name != "" && !repoNameRE.MatchString(req.Name) {
c.JSON(http.StatusBadRequest, gin.H{"error": "repo name must match ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$"})
return
}
var (
repo *artifacts.Repo
err error
)
if req.ImportURL != "" {
// Fix 1: require HTTPS for import URLs to prevent SSRF via non-HTTPS schemes.
if !strings.HasPrefix(req.ImportURL, "https://") {
c.JSON(http.StatusBadRequest, gin.H{"error": "import_url must use https://"})
return
}
repo, err = h.client.ImportRepo(ctx, repoName, artifacts.ImportRepoRequest{
URL: req.ImportURL,
Branch: req.ImportBranch,
Depth: req.ImportDepth,
ReadOnly: req.ReadOnly,
})
} else {
repo, err = h.client.CreateRepo(ctx, artifacts.CreateRepoRequest{
Name: repoName,
Description: req.Description,
ReadOnly: req.ReadOnly,
})
}
if err != nil {
log.Printf("artifacts: CreateRepo/ImportRepo failed for workspace %s: %v", workspaceID, err)
c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)})
return
}
// Strip the embedded credential from the URL before persisting.
remoteURL := stripCredentials(repo.RemoteURL)
var row workspaceArtifactRow
err = db.DB.QueryRowContext(ctx, `
INSERT INTO workspace_artifacts
(workspace_id, cf_repo_name, cf_namespace, remote_url, description)
VALUES ($1, $2, $3, $4, $5)
RETURNING id, workspace_id, cf_repo_name, cf_namespace, remote_url, description, created_at, updated_at
`, workspaceID, repo.Name, h.namespace, remoteURL, req.Description).Scan(
&row.ID, &row.WorkspaceID, &row.CFRepoName, &row.CFNamespace,
&row.RemoteURL, &row.Description, &row.CreatedAt, &row.UpdatedAt,
)
if err != nil {
log.Printf("artifacts: DB insert failed for workspace %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to persist artifact link"})
return
}
c.JSON(http.StatusCreated, row)
}
// ---- GET /workspaces/:id/artifacts -------------------------------------
// Get handles GET /workspaces/:id/artifacts.
// Returns the linked Cloudflare Artifacts repo info from local DB and CF API.
func (h *ArtifactsHandler) Get(c *gin.Context) {
if !h.configured(c) {
return
}
workspaceID := c.Param("id")
ctx := c.Request.Context()
var row workspaceArtifactRow
err := db.DB.QueryRowContext(ctx, `
SELECT id, workspace_id, cf_repo_name, cf_namespace, remote_url, description, created_at, updated_at
FROM workspace_artifacts
WHERE workspace_id = $1
`, workspaceID).Scan(
&row.ID, &row.WorkspaceID, &row.CFRepoName, &row.CFNamespace,
&row.RemoteURL, &row.Description, &row.CreatedAt, &row.UpdatedAt,
)
if err == sql.ErrNoRows {
c.JSON(http.StatusNotFound, gin.H{"error": "no Artifacts repo linked to this workspace"})
return
}
if err != nil {
log.Printf("artifacts: DB query failed for workspace %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
// Augment with live info from CF API (remote URL may have changed, etc.).
cfRepo, err := h.client.GetRepo(ctx, row.CFRepoName)
if err != nil {
// CF API unavailable — return cached DB row with a warning.
log.Printf("artifacts: GetRepo from CF failed for %s: %v", row.CFRepoName, err)
c.JSON(http.StatusOK, gin.H{
"artifact": row,
"cf_status": "unavailable",
"cf_error": cfErrMessage(err),
})
return
}
c.JSON(http.StatusOK, gin.H{
"artifact": row,
"cf_repo": cfRepo,
"cf_status": "ok",
})
}
// ---- POST /workspaces/:id/artifacts/fork -------------------------------
// forkArtifactsRepoRequest is the body for forking a workspace's repo.
type forkArtifactsRepoRequest struct {
// Name is the desired name of the forked repo. Required.
Name string `json:"name" binding:"required"`
// Description is an optional label for the fork.
Description string `json:"description"`
// ReadOnly marks the fork as fetch/clone-only.
ReadOnly bool `json:"read_only"`
// DefaultBranchOnly limits the fork to the default branch.
DefaultBranchOnly bool `json:"default_branch_only"`
}
// Fork handles POST /workspaces/:id/artifacts/fork.
// Creates an isolated copy of the workspace's primary Artifacts repo in CF.
// The fork is not recorded in the local DB — it is owned by the caller.
func (h *ArtifactsHandler) Fork(c *gin.Context) {
if !h.configured(c) {
return
}
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Look up the source repo name.
var cfRepoName string
err := db.DB.QueryRowContext(ctx,
`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id = $1`,
workspaceID,
).Scan(&cfRepoName)
if err == sql.ErrNoRows {
c.JSON(http.StatusNotFound, gin.H{"error": "no Artifacts repo linked to this workspace"})
return
}
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
var req forkArtifactsRepoRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.Name != "" && !repoNameRE.MatchString(req.Name) {
c.JSON(http.StatusBadRequest, gin.H{"error": "repo name must match ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$"})
return
}
result, err := h.client.ForkRepo(ctx, cfRepoName, artifacts.ForkRepoRequest{
Name: req.Name,
Description: req.Description,
ReadOnly: req.ReadOnly,
DefaultBranchOnly: req.DefaultBranchOnly,
})
if err != nil {
log.Printf("artifacts: ForkRepo failed for workspace %s: %v", workspaceID, err)
c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)})
return
}
c.JSON(http.StatusCreated, gin.H{
"fork": result.Repo,
"object_count": result.ObjectCount,
"remote_url": stripCredentials(result.Repo.RemoteURL),
})
}
// ---- POST /workspaces/:id/artifacts/token ------------------------------
// artifactsTokenRequest is the body for minting a git credential.
type artifactsTokenRequest struct {
// Scope is "read" or "write". Defaults to "write".
Scope string `json:"scope"`
// TTL is the credential lifetime in seconds. Defaults to 3600 (1h).
TTL int `json:"ttl"`
}
// Token handles POST /workspaces/:id/artifacts/token.
// Returns a short-lived Git credential for the workspace's linked repo.
// The plaintext token value must be saved by the caller — it is not stored.
func (h *ArtifactsHandler) Token(c *gin.Context) {
if !h.configured(c) {
return
}
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Look up the linked CF repo name.
var cfRepoName string
err := db.DB.QueryRowContext(ctx,
`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id = $1`,
workspaceID,
).Scan(&cfRepoName)
if err == sql.ErrNoRows {
c.JSON(http.StatusNotFound, gin.H{"error": "no Artifacts repo linked to this workspace"})
return
}
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
var req artifactsTokenRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
scope := req.Scope
if scope == "" {
scope = "write"
}
if scope != "read" && scope != "write" {
c.JSON(http.StatusBadRequest, gin.H{"error": "scope must be \"read\" or \"write\""})
return
}
ttl := req.TTL
if ttl <= 0 {
ttl = 3600
}
const maxTTL = 86400 * 7 // 7 days
if ttl > maxTTL {
ttl = maxTTL
}
tok, err := h.client.CreateToken(ctx, artifacts.CreateTokenRequest{
Repo: cfRepoName,
Scope: scope,
TTL: ttl,
})
if err != nil {
log.Printf("artifacts: CreateToken failed for workspace %s: %v", workspaceID, err)
c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)})
return
}
// Build the authenticated git remote URL inline so callers can use it
// directly: git clone <clone_url>
cloneURL := buildCloneURL(cfRepoName, tok.Token, h.namespace)
c.JSON(http.StatusCreated, gin.H{
"token_id": tok.ID,
"token": tok.Token,
"scope": tok.Scope,
"expires_at": tok.ExpiresAt,
"clone_url": cloneURL,
"message": "Save this token — it cannot be retrieved again.",
})
}
// ---- helpers -------------------------------------------------------------
// cfErrToHTTP converts a CF API error to an appropriate HTTP status code.
// Passes through 4xx, maps everything else to 502 (bad gateway — upstream CF).
func cfErrToHTTP(err error) int {
apiErr, ok := err.(*artifacts.APIError)
if !ok {
return http.StatusBadGateway
}
if apiErr.StatusCode >= 400 && apiErr.StatusCode < 500 {
return apiErr.StatusCode
}
return http.StatusBadGateway
}
// stripCredentials removes "x:<token>@" from an authenticated git remote URL
// so we never persist credentials in the database.
// e.g. "https://x:tok@hash.artifacts.cloudflare.net/…" → "https://hash.artifacts.cloudflare.net/…"
func stripCredentials(remoteURL string) string {
if i := strings.Index(remoteURL, "@"); i != -1 {
scheme := "https://"
if strings.HasPrefix(remoteURL, "http://") {
scheme = "http://"
}
return scheme + remoteURL[i+1:]
}
return remoteURL
}
// buildCloneURL constructs an authenticated clone URL from the CF token.
// Format: https://x:<token>@<hash>.artifacts.cloudflare.net/git/repo-<name>.git
// When we only have the repo name (not the full hashed host), we use a stable
// URL pattern that the CF git endpoint resolves.
func buildCloneURL(repoName, token, _ string) string {
// The CF git endpoint is the remote_url stored in the DB (minus the
// credential prefix). We reconstruct the authenticated form here.
// In production the remote URL is returned by CreateRepo/GetRepo;
// this fallback covers cases where the DB row predates that field.
return "https://x:" + token + "@artifacts.cloudflare.net/git/" + repoName + ".git"
}

View File

@ -0,0 +1,995 @@
package handlers
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/artifacts"
"github.com/gin-gonic/gin"
)
// cfSuccessResponse wraps a result in the Cloudflare v4 success envelope.
func cfSuccessResponse(t *testing.T, result interface{}) []byte {
t.Helper()
b, err := json.Marshal(result)
if err != nil {
t.Fatalf("cfSuccessResponse: marshal result: %v", err)
}
env := map[string]interface{}{
"success": true,
"result": json.RawMessage(b),
"errors": []interface{}{},
}
out, err := json.Marshal(env)
if err != nil {
t.Fatalf("cfSuccessResponse: marshal envelope: %v", err)
}
return out
}
// cfErrorResponse returns a Cloudflare v4 error envelope bytes and status code.
func cfErrorResponse(t *testing.T, statusCode, code int, message string) ([]byte, int) {
t.Helper()
env := map[string]interface{}{
"success": false,
"result": nil,
"errors": []map[string]interface{}{
{"code": code, "message": message},
},
}
b, _ := json.Marshal(env)
return b, statusCode
}
// newArtifactsMockServer starts an httptest.Server with the given handler function
// registered at /namespaces/test-ns/<suffix>.
func newArtifactsMockCFServer(t *testing.T, suffix string, handler http.HandlerFunc) *artifacts.Client {
t.Helper()
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns"+suffix, handler)
srv := httptest.NewServer(mux)
t.Cleanup(srv.Close)
return artifacts.NewWithBaseURL("cf-test-token", "test-ns", srv.URL)
}
// ============================= Create =====================================
// TestArtifactsCreate_Success verifies the happy path: no existing link →
// CF API returns a repo → DB INSERT succeeds → 201 response.
func TestArtifactsCreate_Success(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
cfClient := newArtifactsMockCFServer(t, "/repos", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "wrong method", http.StatusMethodNotAllowed)
return
}
repo := artifacts.Repo{
Name: "molecule-ws-ws-abc",
ID: "repo-001",
RemoteURL: "https://x:tok123@hash.artifacts.cloudflare.net/git/repo-001.git",
CreatedAt: time.Now(),
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, repo))
})
// Existence probe — no existing link
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-abc").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// DB INSERT — RETURNING row
now := time.Now()
mock.ExpectQuery(`INSERT INTO workspace_artifacts`).
WithArgs("ws-abc", "molecule-ws-ws-abc", "test-ns",
"https://hash.artifacts.cloudflare.net/git/repo-001.git", "").
WillReturnRows(sqlmock.NewRows(
[]string{"id", "workspace_id", "cf_repo_name", "cf_namespace", "remote_url", "description", "created_at", "updated_at"}).
AddRow("art-1", "ws-abc", "molecule-ws-ws-abc", "test-ns",
"https://hash.artifacts.cloudflare.net/git/repo-001.git", "", now, now))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-abc"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-abc/artifacts",
bytes.NewBufferString(`{}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["cf_repo_name"] != "molecule-ws-ws-abc" {
t.Errorf("cf_repo_name = %v, want molecule-ws-ws-abc", resp["cf_repo_name"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsCreate_AlreadyLinked verifies that a 409 is returned when the
// workspace already has a linked Artifacts repo.
func TestArtifactsCreate_AlreadyLinked(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Existence probe returns true
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-dup").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "ns", "http://unused"),
"ns",
)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-dup"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-dup/artifacts",
bytes.NewBufferString(`{"name":"dup-repo"}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusConflict {
t.Errorf("expected 409, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsCreate_CFAPIError verifies that a CF API error (e.g. 409 conflict)
// is forwarded with the appropriate HTTP status.
func TestArtifactsCreate_CFAPIError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
cfClient := newArtifactsMockCFServer(t, "/repos", func(w http.ResponseWriter, r *http.Request) {
body, status := cfErrorResponse(t, http.StatusConflict, 1009, "repo name already taken")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
w.Write(body)
})
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-cfconflict").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-cfconflict"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-cfconflict/artifacts",
bytes.NewBufferString(`{"name":"taken-name"}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusConflict {
t.Errorf("expected 409 from CF error, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsCreate_WithImportURL verifies that when import_url is set the
// handler hits the /import endpoint instead of plain /repos.
func TestArtifactsCreate_WithImportURL(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Two paths: /repos/imported-repo/import
mux := http.NewServeMux()
mux.HandleFunc("/namespaces/test-ns/repos/imported-repo/import", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "wrong method", http.StatusMethodNotAllowed)
return
}
var body map[string]interface{}
json.NewDecoder(r.Body).Decode(&body)
if body["url"] != "https://github.com/Molecule-AI/molecule-core.git" {
http.Error(w, "unexpected url", http.StatusBadRequest)
return
}
repo := artifacts.Repo{
Name: "imported-repo",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/imported.git",
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, repo))
})
srv := httptest.NewServer(mux)
t.Cleanup(srv.Close)
cfClient := artifacts.NewWithBaseURL("tok", "test-ns", srv.URL)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-import").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
now := time.Now()
mock.ExpectQuery(`INSERT INTO workspace_artifacts`).
WithArgs("ws-import", "imported-repo", "test-ns",
"https://hash.artifacts.cloudflare.net/git/imported.git", "Imported from GitHub").
WillReturnRows(sqlmock.NewRows(
[]string{"id", "workspace_id", "cf_repo_name", "cf_namespace", "remote_url", "description", "created_at", "updated_at"}).
AddRow("art-imp", "ws-import", "imported-repo", "test-ns",
"https://hash.artifacts.cloudflare.net/git/imported.git", "Imported from GitHub", now, now))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-import"}}
body := `{"name":"imported-repo","description":"Imported from GitHub","import_url":"https://github.com/Molecule-AI/molecule-core.git","import_branch":"main","import_depth":1}`
c.Request = httptest.NewRequest("POST", "/workspaces/ws-import/artifacts",
bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsCreate_NotConfigured verifies that missing env vars → 503.
func TestArtifactsCreate_NotConfigured(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
// No CF client → nil
h := newArtifactsHandlerWithClient(nil, "")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-uncfg"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-uncfg/artifacts",
bytes.NewBufferString(`{}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String())
}
}
// ============================= Get =======================================
// TestArtifactsGet_Success verifies the happy path: DB row found + CF API ok.
func TestArtifactsGet_Success(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
cfClient := newArtifactsMockCFServer(t, "/repos/my-ws-repo", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "wrong method", http.StatusMethodNotAllowed)
return
}
repo := artifacts.Repo{
Name: "my-ws-repo",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/my-ws-repo.git",
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, repo))
})
now := time.Now()
mock.ExpectQuery(`SELECT id, workspace_id, cf_repo_name`).
WithArgs("ws-get").
WillReturnRows(sqlmock.NewRows(
[]string{"id", "workspace_id", "cf_repo_name", "cf_namespace", "remote_url", "description", "created_at", "updated_at"}).
AddRow("art-get", "ws-get", "my-ws-repo", "test-ns",
"https://hash.artifacts.cloudflare.net/git/my-ws-repo.git", "", now, now))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-get"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-get/artifacts", nil)
h.Get(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["cf_status"] != "ok" {
t.Errorf("cf_status = %v, want ok", resp["cf_status"])
}
art, ok := resp["artifact"].(map[string]interface{})
if !ok {
t.Fatalf("artifact is not an object")
}
if art["cf_repo_name"] != "my-ws-repo" {
t.Errorf("artifact.cf_repo_name = %v, want my-ws-repo", art["cf_repo_name"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsGet_NotFound verifies that 404 is returned when no row exists.
func TestArtifactsGet_NotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT id, workspace_id, cf_repo_name`).
WithArgs("ws-noart").
WillReturnError(sql.ErrNoRows)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "ns", "http://unused"),
"ns",
)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-noart"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-noart/artifacts", nil)
h.Get(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsGet_CFUnavailable verifies that when CF API fails the handler
// still returns 200 with the cached DB row and cf_status="unavailable".
func TestArtifactsGet_CFUnavailable(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// CF API server that always returns 500
cfClient := newArtifactsMockCFServer(t, "/repos/cached-repo", func(w http.ResponseWriter, r *http.Request) {
body, status := cfErrorResponse(t, http.StatusInternalServerError, 0, "service unavailable")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
w.Write(body)
})
now := time.Now()
mock.ExpectQuery(`SELECT id, workspace_id, cf_repo_name`).
WithArgs("ws-cfdown").
WillReturnRows(sqlmock.NewRows(
[]string{"id", "workspace_id", "cf_repo_name", "cf_namespace", "remote_url", "description", "created_at", "updated_at"}).
AddRow("art-cfdown", "ws-cfdown", "cached-repo", "test-ns",
"https://hash.artifacts.cloudflare.net/git/cached-repo.git", "", now, now))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-cfdown"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-cfdown/artifacts", nil)
h.Get(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 (degraded), got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["cf_status"] != "unavailable" {
t.Errorf("cf_status = %v, want unavailable", resp["cf_status"])
}
if resp["artifact"] == nil {
t.Error("artifact should still be present from DB cache")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// ============================= Fork ======================================
// TestArtifactsFork_Success verifies the fork happy path.
func TestArtifactsFork_Success(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
cfClient := newArtifactsMockCFServer(t, "/repos/source-repo/fork", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "wrong method", http.StatusMethodNotAllowed)
return
}
result := artifacts.ForkResult{
Repo: artifacts.Repo{
Name: "forked-ws",
ID: "fork-1",
RemoteURL: "https://x:tok@hash.artifacts.cloudflare.net/git/fork-1.git",
},
ObjectCount: 88,
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, result))
})
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-fork-src").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("source-repo"))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-fork-src"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-fork-src/artifacts/fork",
bytes.NewBufferString(`{"name":"forked-ws"}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Fork(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["object_count"] != float64(88) {
t.Errorf("object_count = %v, want 88", resp["object_count"])
}
fork, ok := resp["fork"].(map[string]interface{})
if !ok {
t.Fatalf("fork is not an object")
}
if fork["name"] != "forked-ws" {
t.Errorf("fork.name = %v, want forked-ws", fork["name"])
}
// Embedded credentials must be stripped from clone_url
if remote := resp["remote_url"].(string); len(remote) > 0 {
if containsCredentials(remote) {
t.Errorf("remote_url should not contain credentials: %s", remote)
}
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsFork_NoLinkedRepo verifies 404 when workspace has no linked repo.
func TestArtifactsFork_NoLinkedRepo(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-norepo").
WillReturnError(sql.ErrNoRows)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "ns", "http://unused"),
"ns",
)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-norepo"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-norepo/artifacts/fork",
bytes.NewBufferString(`{"name":"fork-name"}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Fork(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsFork_MissingName verifies 400 when the fork name is missing.
func TestArtifactsFork_MissingName(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-fork-badname").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("src-repo"))
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-fork-badname"}}
// name is required (binding:"required") but absent → 400
c.Request = httptest.NewRequest("POST", "/workspaces/ws-fork-badname/artifacts/fork",
bytes.NewBufferString(`{}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Fork(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for missing name, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// ============================= Token =====================================
// TestArtifactsToken_Success verifies the happy path: linked repo → CF returns token.
func TestArtifactsToken_Success(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
expiry := time.Now().Add(3600 * time.Second).UTC()
cfClient := newArtifactsMockCFServer(t, "/tokens", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "wrong method", http.StatusMethodNotAllowed)
return
}
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
if req["repo"] != "my-linked-repo" {
http.Error(w, "unexpected repo", http.StatusBadRequest)
return
}
tok := artifacts.RepoToken{
ID: "token-abc",
Token: "plaintext-git-token",
Scope: "write",
ExpiresAt: expiry,
}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, tok))
})
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-token").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("my-linked-repo"))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-token"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-token/artifacts/token",
bytes.NewBufferString(`{"scope":"write","ttl":3600}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Token(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["token_id"] != "token-abc" {
t.Errorf("token_id = %v, want token-abc", resp["token_id"])
}
if resp["token"] != "plaintext-git-token" {
t.Errorf("token = %v, want plaintext-git-token", resp["token"])
}
if resp["clone_url"] == nil || resp["clone_url"] == "" {
t.Error("clone_url should be non-empty")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsToken_DefaultsApplied verifies that empty scope/ttl are defaulted
// to "write" / 3600.
func TestArtifactsToken_DefaultsApplied(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
expiry := time.Now().Add(3600 * time.Second).UTC()
cfClient := newArtifactsMockCFServer(t, "/tokens", func(w http.ResponseWriter, r *http.Request) {
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
// scope should be "write" (default)
if req["scope"] != "write" {
http.Error(w, "expected default scope write", http.StatusBadRequest)
return
}
// ttl should be 3600 (default), serialized as float64 from JSON
if req["ttl"] != float64(3600) {
http.Error(w, "expected default ttl 3600", http.StatusBadRequest)
return
}
tok := artifacts.RepoToken{ID: "t1", Token: "tok-def", Scope: "write", ExpiresAt: expiry}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, tok))
})
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-defaults").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("my-repo"))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-defaults"}}
// Empty body — all defaults
c.Request = httptest.NewRequest("POST", "/workspaces/ws-defaults/artifacts/token",
bytes.NewBufferString(`{}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Token(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsToken_InvalidScope verifies that an invalid scope returns 400.
func TestArtifactsToken_InvalidScope(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-badscope").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("some-repo"))
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "ns", "http://unused"),
"ns",
)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badscope"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-badscope/artifacts/token",
bytes.NewBufferString(`{"scope":"admin"}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Token(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for invalid scope, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsToken_TTLCapped verifies that excessive TTL is silently capped
// to 7 days (604800 seconds) rather than returning an error.
func TestArtifactsToken_TTLCapped(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
const maxTTL = 86400 * 7
expiry := time.Now().Add(maxTTL * time.Second).UTC()
cfClient := newArtifactsMockCFServer(t, "/tokens", func(w http.ResponseWriter, r *http.Request) {
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
if int(req["ttl"].(float64)) != maxTTL {
http.Error(w, "expected capped ttl", http.StatusBadRequest)
return
}
tok := artifacts.RepoToken{ID: "t-cap", Token: "capped-tok", Scope: "write", ExpiresAt: expiry}
w.Header().Set("Content-Type", "application/json")
w.Write(cfSuccessResponse(t, tok))
})
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-ttlcap").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("capped-repo"))
h := newArtifactsHandlerWithClient(cfClient, "test-ns")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-ttlcap"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-ttlcap/artifacts/token",
bytes.NewBufferString(`{"scope":"write","ttl":99999999}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Token(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// TestArtifactsToken_NoLinkedRepo verifies 404 when no repo is linked.
func TestArtifactsToken_NoLinkedRepo(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-tokennolink").
WillReturnError(sql.ErrNoRows)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "ns", "http://unused"),
"ns",
)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-tokennolink"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-tokennolink/artifacts/token",
bytes.NewBufferString(`{}`))
c.Request.Header.Set("Content-Type", "application/json")
h.Token(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
}
// ============================= helper unit tests =========================
// TestStripCredentials verifies that stripCredentials removes user:token@ from URLs.
func TestStripCredentials(t *testing.T) {
cases := []struct {
input string
want string
}{
{
"https://x:tok123@hash.artifacts.cloudflare.net/git/repo.git",
"https://hash.artifacts.cloudflare.net/git/repo.git",
},
{
"https://hash.artifacts.cloudflare.net/git/repo.git",
"https://hash.artifacts.cloudflare.net/git/repo.git",
},
{
"http://user:pass@example.com/repo.git",
"http://example.com/repo.git",
},
{"", ""},
}
for _, tc := range cases {
got := stripCredentials(tc.input)
if got != tc.want {
t.Errorf("stripCredentials(%q) = %q, want %q", tc.input, got, tc.want)
}
}
}
// TestCfErrToHTTP verifies the CF-error-to-HTTP-status mapping.
func TestCfErrToHTTP(t *testing.T) {
cases := []struct {
err error
want int
}{
{&artifacts.APIError{StatusCode: http.StatusConflict}, http.StatusConflict},
{&artifacts.APIError{StatusCode: http.StatusNotFound}, http.StatusNotFound},
{&artifacts.APIError{StatusCode: http.StatusBadRequest}, http.StatusBadRequest},
{&artifacts.APIError{StatusCode: http.StatusInternalServerError}, http.StatusBadGateway},
{&artifacts.APIError{StatusCode: http.StatusBadGateway}, http.StatusBadGateway},
}
for _, tc := range cases {
got := cfErrToHTTP(tc.err)
if got != tc.want {
t.Errorf("cfErrToHTTP(%v) = %d, want %d", tc.err, got, tc.want)
}
}
}
// ============================= Security fix tests ============================
// TestCfErrMessage_5xxReturnsGeneric verifies that CF 5xx errors return a
// generic message instead of leaking CF internals.
func TestCfErrMessage_5xxReturnsGeneric(t *testing.T) {
err := &artifacts.APIError{StatusCode: http.StatusInternalServerError, Message: "internal CF detail"}
got := cfErrMessage(err)
if got != "upstream service error" {
t.Errorf("cfErrMessage(500) = %q, want %q", got, "upstream service error")
}
}
// TestCfErrMessage_502ReturnsGeneric verifies that CF 502 (bad gateway) is also masked.
func TestCfErrMessage_502ReturnsGeneric(t *testing.T) {
err := &artifacts.APIError{StatusCode: http.StatusBadGateway, Message: "gateway detail"}
got := cfErrMessage(err)
if got != "upstream service error" {
t.Errorf("cfErrMessage(502) = %q, want %q", got, "upstream service error")
}
}
// TestCfErrMessage_4xxPassesThrough verifies that CF 4xx messages are surfaced.
func TestCfErrMessage_4xxPassesThrough(t *testing.T) {
msg := "repo name already taken"
err := &artifacts.APIError{StatusCode: http.StatusConflict, Message: msg}
got := cfErrMessage(err)
if got != msg {
t.Errorf("cfErrMessage(409) = %q, want %q", got, msg)
}
}
// TestCfErrMessage_NonAPIErrorReturnsGeneric verifies that non-CF errors return generic message.
func TestCfErrMessage_NonAPIErrorReturnsGeneric(t *testing.T) {
err := fmt.Errorf("some network error")
got := cfErrMessage(err)
if got != "upstream service error" {
t.Errorf("cfErrMessage(non-API) = %q, want %q", got, "upstream service error")
}
}
// TestArtifactsCreate_ImportURLNonHTTPS verifies that a non-HTTPS import_url
// is rejected with 400.
func TestArtifactsCreate_ImportURLNonHTTPS(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-badurl").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
cases := []string{
"http://github.com/org/repo.git",
"git://github.com/org/repo.git",
"ssh://git@github.com/org/repo.git",
"file:///etc/passwd",
}
for _, url := range cases {
t.Run(url, func(t *testing.T) {
// Re-register the EXISTS probe expectation for each sub-test case.
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-badurl").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badurl"}}
body, _ := json.Marshal(map[string]interface{}{
"name": "my-repo",
"import_url": url,
})
c.Request = httptest.NewRequest("POST", "/workspaces/ws-badurl/artifacts",
bytes.NewBuffer(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusBadRequest {
t.Errorf("import_url=%q: expected 400, got %d: %s", url, w.Code, w.Body.String())
}
})
}
}
// TestArtifactsCreate_InvalidRepoName verifies that invalid repo names return 400.
func TestArtifactsCreate_InvalidRepoName(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
invalidNames := []string{
"-starts-with-dash",
"_starts-with-underscore",
"has spaces",
"has/slash",
"has.dot",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 64 chars
}
for _, name := range invalidNames {
t.Run(name, func(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-badname").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badname"}}
body, _ := json.Marshal(map[string]interface{}{"name": name})
c.Request = httptest.NewRequest("POST", "/workspaces/ws-badname/artifacts",
bytes.NewBuffer(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusBadRequest {
t.Errorf("name=%q: expected 400, got %d: %s", name, w.Code, w.Body.String())
}
})
}
}
// TestArtifactsFork_InvalidRepoName verifies that invalid fork names return 400.
func TestArtifactsFork_InvalidRepoName(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
invalidNames := []string{
"-bad-start",
"has spaces",
"../traversal",
}
for _, name := range invalidNames {
t.Run(name, func(t *testing.T) {
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-forknm").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("src"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-forknm"}}
body, _ := json.Marshal(map[string]interface{}{"name": name})
c.Request = httptest.NewRequest("POST", "/workspaces/ws-forknm/artifacts/fork",
bytes.NewBuffer(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Fork(c)
if w.Code != http.StatusBadRequest {
t.Errorf("fork name=%q: expected 400, got %d: %s", name, w.Code, w.Body.String())
}
})
}
}
// containsCredentials is a test helper that checks whether a URL has embedded
// user:password@ credentials (should never appear in a stored remote URL).
func containsCredentials(u string) bool {
// A URL with embedded creds has the form scheme://user:pass@host/...
// We check for "@" after the scheme to detect this.
for i := 0; i < len(u)-3; i++ {
if u[i] == ':' && i > 0 && u[i-1] != '/' {
// Found ":" that is not ":/" — could be user:pass pair
if j := len(u); j > i {
for k := i + 1; k < j; k++ {
if u[k] == '@' {
return true
}
if u[k] == '/' {
break
}
}
}
}
}
return false
}

View File

@ -0,0 +1,171 @@
package handlers
import (
"database/sql"
"log"
"net/http"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// BudgetHandler exposes per-workspace budget read/write endpoints.
// Routes (all behind WorkspaceAuth middleware):
//
// GET /workspaces/:id/budget — current budget_limit, monthly_spend, budget_remaining
// PATCH /workspaces/:id/budget — set or clear budget_limit
type BudgetHandler struct{}
func NewBudgetHandler() *BudgetHandler { return &BudgetHandler{} }
// budgetResponse is the canonical JSON shape for both GET and PATCH responses.
type budgetResponse struct {
// BudgetLimit is the monthly spend ceiling in USD cents (null = no limit).
// budget_limit=500 means $5.00/month.
BudgetLimit *int64 `json:"budget_limit"`
// MonthlySpend is the agent's self-reported accumulated LLM API spend
// for the current month (USD cents). Incremented via heartbeat.
MonthlySpend int64 `json:"monthly_spend"`
// BudgetRemaining is null when BudgetLimit is null, otherwise
// max(0, budget_limit - monthly_spend). Can be negative — we store the
// actual value so callers can see how far over-budget a workspace is.
BudgetRemaining *int64 `json:"budget_remaining"`
}
// GetBudget handles GET /workspaces/:id/budget.
// Returns the workspace's current budget ceiling, accumulated spend, and
// computed remaining headroom. Both budget_limit and budget_remaining are
// null when no limit has been configured for the workspace.
func (h *BudgetHandler) GetBudget(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
var budgetLimit sql.NullInt64
var monthlySpend int64
err := db.DB.QueryRowContext(ctx,
`SELECT budget_limit, COALESCE(monthly_spend, 0)
FROM workspaces
WHERE id = $1 AND status != 'removed'`,
workspaceID,
).Scan(&budgetLimit, &monthlySpend)
if err == sql.ErrNoRows {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
if err != nil {
log.Printf("GetBudget: query failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
resp := budgetResponse{
MonthlySpend: monthlySpend,
}
if budgetLimit.Valid {
limit := budgetLimit.Int64
resp.BudgetLimit = &limit
remaining := limit - monthlySpend
resp.BudgetRemaining = &remaining
}
c.JSON(http.StatusOK, resp)
}
// patchBudgetRequest is the expected JSON body for PATCH /workspaces/:id/budget.
// budget_limit=null removes the ceiling; a positive integer sets it (USD cents).
type patchBudgetRequest struct {
// BudgetLimit pointer so JSON null → nil, absent → parse error (required field).
BudgetLimit *int64 `json:"budget_limit"`
}
// PatchBudget handles PATCH /workspaces/:id/budget.
// Accepts {"budget_limit": <int64>} to set a new ceiling, or
// {"budget_limit": null} to remove an existing ceiling.
// Returns the updated budget state in the same shape as GetBudget.
func (h *BudgetHandler) PatchBudget(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// We need to distinguish between "field absent" and "field = null",
// so we unmarshal into a raw map first.
var raw map[string]interface{}
if err := c.ShouldBindJSON(&raw); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
budgetLimitRaw, ok := raw["budget_limit"]
if !ok {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit field is required"})
return
}
// Validate and convert the value. JSON numbers decode as float64.
var budgetArg interface{} // nil → SQL NULL, int64 → new ceiling
if budgetLimitRaw != nil {
switch v := budgetLimitRaw.(type) {
case float64:
if v < 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
return
}
cv := int64(v)
budgetArg = cv
case int64:
if v < 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
return
}
budgetArg = v
default:
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be an integer (USD cents) or null"})
return
}
}
// budgetArg == nil means "clear the ceiling"
// Existence check — return 404 for non-existent / removed workspaces.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1 AND status != 'removed')`,
workspaceID,
).Scan(&exists); err != nil || !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
if _, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET budget_limit = $2, updated_at = now() WHERE id = $1`,
workspaceID, budgetArg,
); err != nil {
log.Printf("PatchBudget: update failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
return
}
// Re-read the current state so the response reflects exactly what is in
// the DB, including the monthly_spend the agent has already accumulated.
var newLimit sql.NullInt64
var monthlySpend int64
if err := db.DB.QueryRowContext(ctx,
`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&newLimit, &monthlySpend); err != nil {
log.Printf("PatchBudget: re-read failed for %s: %v", workspaceID, err)
// Still success — just omit the echo.
c.JSON(http.StatusOK, gin.H{"status": "updated"})
return
}
resp := budgetResponse{
MonthlySpend: monthlySpend,
}
if newLimit.Valid {
limit := newLimit.Int64
resp.BudgetLimit = &limit
remaining := limit - monthlySpend
resp.BudgetRemaining = &remaining
}
c.JSON(http.StatusOK, resp)
}

View File

@ -0,0 +1,458 @@
package handlers
import (
"bytes"
"database/sql"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// ==================== GET /workspaces/:id/budget ====================
// TestBudgetGet_NotFound verifies that GET /budget returns 404 for an unknown
// workspace ID (ErrNoRows from the budget query).
func TestBudgetGet_NotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
WithArgs("ws-not-there").
WillReturnError(sql.ErrNoRows)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-not-there"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-not-there/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetGet_DBError verifies that a non-ErrNoRows DB error returns 500.
func TestBudgetGet_DBError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
WithArgs("ws-db-err").
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-db-err"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-db-err/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetGet_NoLimit verifies that budget_limit and budget_remaining are
// null when the workspace has no budget ceiling configured.
func TestBudgetGet_NoLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
WithArgs("ws-free").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(nil, int64(42)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-free"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-free/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp["budget_limit"] != nil {
t.Errorf("expected budget_limit=null, got %v", resp["budget_limit"])
}
if resp["budget_remaining"] != nil {
t.Errorf("expected budget_remaining=null, got %v", resp["budget_remaining"])
}
if resp["monthly_spend"] != float64(42) {
t.Errorf("expected monthly_spend=42, got %v", resp["monthly_spend"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetGet_WithLimit verifies that budget_limit, monthly_spend, and
// budget_remaining are all returned correctly when a ceiling is set.
func TestBudgetGet_WithLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
WithArgs("ws-capped").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(123)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-capped"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-capped/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp["budget_limit"] != float64(500) {
t.Errorf("expected budget_limit=500, got %v", resp["budget_limit"])
}
if resp["monthly_spend"] != float64(123) {
t.Errorf("expected monthly_spend=123, got %v", resp["monthly_spend"])
}
// budget_remaining = 500 - 123 = 377
if resp["budget_remaining"] != float64(377) {
t.Errorf("expected budget_remaining=377, got %v", resp["budget_remaining"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetGet_OverBudget verifies that budget_remaining can be negative
// when monthly_spend has already exceeded budget_limit.
func TestBudgetGet_OverBudget(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
WithArgs("ws-over").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(100), int64(150)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-over"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-over/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
// budget_remaining = 100 - 150 = -50 (negative, but we store actual value)
if resp["budget_remaining"] != float64(-50) {
t.Errorf("expected budget_remaining=-50, got %v", resp["budget_remaining"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// ==================== PATCH /workspaces/:id/budget ====================
// TestBudgetPatch_MissingField verifies that PATCH /budget with no budget_limit
// field in the body returns 400.
func TestBudgetPatch_MissingField(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-patch-missing"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-patch-missing/budget",
bytes.NewBufferString(`{"other_field":123}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_InvalidBody verifies that a malformed JSON body returns 400.
func TestBudgetPatch_InvalidBody(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-patch-bad"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-patch-bad/budget",
bytes.NewBufferString(`not json`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_NegativeValue verifies that a negative budget_limit is rejected.
func TestBudgetPatch_NegativeValue(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-negative"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-negative/budget",
bytes.NewBufferString(`{"budget_limit":-1}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for negative budget_limit, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_InvalidType verifies that a non-numeric budget_limit returns 400.
func TestBudgetPatch_InvalidType(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badtype"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-badtype/budget",
bytes.NewBufferString(`{"budget_limit":"not-a-number"}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for string budget_limit, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_WorkspaceNotFound verifies that PATCH /budget returns 404
// when the workspace doesn't exist.
func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-no-exist").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-no-exist"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-no-exist/budget",
bytes.NewBufferString(`{"budget_limit":500}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetPatch_SetLimit verifies that PATCH /budget with a positive value
// updates the DB and returns the new budget state.
func TestBudgetPatch_SetLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Existence probe
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-set-limit").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// UPDATE
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-set-limit", int64(500)).
WillReturnResult(sqlmock.NewResult(0, 1))
// Re-read for response
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
WithArgs("ws-set-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(200)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-set-limit"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-set-limit/budget",
bytes.NewBufferString(`{"budget_limit":500}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp["budget_limit"] != float64(500) {
t.Errorf("expected budget_limit=500, got %v", resp["budget_limit"])
}
if resp["monthly_spend"] != float64(200) {
t.Errorf("expected monthly_spend=200, got %v", resp["monthly_spend"])
}
// budget_remaining = 500 - 200 = 300
if resp["budget_remaining"] != float64(300) {
t.Errorf("expected budget_remaining=300, got %v", resp["budget_remaining"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetPatch_ClearLimit verifies that PATCH /budget with budget_limit=null
// clears the ceiling, making budget_limit and budget_remaining null in the response.
func TestBudgetPatch_ClearLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-clear-limit").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// UPDATE with NULL
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-clear-limit", nil).
WillReturnResult(sqlmock.NewResult(0, 1))
// Re-read — budget_limit is now NULL
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
WithArgs("ws-clear-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(nil, int64(50)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-clear-limit"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-clear-limit/budget",
bytes.NewBufferString(`{"budget_limit":null}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp["budget_limit"] != nil {
t.Errorf("expected budget_limit=null after clear, got %v", resp["budget_limit"])
}
if resp["budget_remaining"] != nil {
t.Errorf("expected budget_remaining=null after clear, got %v", resp["budget_remaining"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetPatch_UpdateDBError verifies that a DB error during the UPDATE
// returns 500.
func TestBudgetPatch_UpdateDBError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-patch-dberr").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-patch-dberr", int64(500)).
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-patch-dberr"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-patch-dberr/budget",
bytes.NewBufferString(`{"budget_limit":500}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500 on UPDATE error, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestBudgetPatch_ZeroLimit verifies that budget_limit=0 is accepted (it means
// every A2A call is blocked — useful to pause a workspace's LLM spend entirely).
func TestBudgetPatch_ZeroLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-zero-limit").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-zero-limit", int64(0)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
WithArgs("ws-zero-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(0), int64(0)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-zero-limit"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-zero-limit/budget",
bytes.NewBufferString(`{"budget_limit":0}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}

View File

@ -54,6 +54,13 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
return // response already written
}
// #548 — prevent self-delegation: a workspace delegating to itself
// acquires _run_lock twice on the same mutex, deadlocking permanently.
if sourceID == body.TargetID {
c.JSON(http.StatusBadRequest, gin.H{"error": "self-delegation not permitted"})
return
}
// #124 — idempotency. If the caller supplies an idempotency_key, return
// the existing delegation when (workspace_id, idempotency_key) already
// exists and is not in a failed terminal state.

View File

@ -88,6 +88,37 @@ func TestDelegate_InvalidUUIDTargetID(t *testing.T) {
}
}
// ---------- Delegate: self-delegation → 400 ----------
func TestDelegate_SelfDelegation_Rejected(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
dh := NewDelegationHandler(wh, broadcaster)
// Use the same UUID for both source and target to trigger the self-delegation guard.
selfID := "11111111-2222-3333-4444-555555555555"
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: selfID}}
body := `{"target_id":"` + selfID + `","task":"do something"}`
c.Request = httptest.NewRequest("POST", "/workspaces/"+selfID+"/delegate", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
dh.Delegate(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["error"] != "self-delegation not permitted" {
t.Errorf("expected 'self-delegation not permitted', got %v", resp["error"])
}
}
// ---------- Delegate: success → 202 with delegation_id ----------
func TestDelegate_Success(t *testing.T) {

View File

@ -0,0 +1,115 @@
// Package handlers — GitHub App installation-token refresh endpoint.
//
// GET /admin/github-installation-token returns a fresh GitHub App
// installation token on demand. Long-running workspace containers use
// this as a git credential helper and for explicit `gh auth` re-runs
// so they never operate with an expired GH_TOKEN.
//
// # Why this endpoint?
//
// The github-app-auth plugin (PR #506) injects GH_TOKEN + GITHUB_TOKEN
// into a workspace container's env at provision time. Those tokens are
// GitHub App installation tokens with a fixed ~60 min TTL. The plugin
// keeps a server-side in-process cache and proactively refreshes it
// 5 min before expiry, but the workspace env is set once at container
// start and never updated — so any workspace alive >60 min ends up with
// an expired token (issue #547).
//
// The fix is:
//
// 1. Platform side (this file): expose GET /admin/github-installation-token.
// The handler delegates to the registered TokenProvider (typically the
// github-app-auth plugin), whose cache is always fresh. Gated behind
// AdminAuth — any valid workspace bearer token can call it.
//
// 2. Workspace side: a shell credential helper
// (workspace-template/scripts/molecule-git-token-helper.sh) configured
// as the git credential helper. git calls it on every push/fetch;
// it hits this endpoint and emits the fresh token to stdout. A 30-min
// cron also runs `gh auth login --with-token` using the same helper.
//
// # Approach chosen
//
// Option B (pre-flight/on-demand): workspaces poll for a token when
// they need one (credential helper callback). This is preferable over a
// background goroutine pusher (Option A) because:
//
// - The plugin already maintains its own refresh cache — there is no
// token to refresh on the platform side.
// - Pushing a new token into running containers requires docker exec /
// env mutation, which the architecture explicitly rejects (see issue
// #547 "Alternatives considered").
// - On-demand is pull-based, stateless, and trivially testable.
package handlers
import (
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
"github.com/gin-gonic/gin"
)
// GitHubTokenHandler serves GET /admin/github-installation-token.
type GitHubTokenHandler struct {
registry *provisionhook.Registry
}
// NewGitHubTokenHandler constructs the handler. registry may be nil when
// no GitHub App plugin is registered (dev / self-hosted deployments).
func NewGitHubTokenHandler(reg *provisionhook.Registry) *GitHubTokenHandler {
return &GitHubTokenHandler{registry: reg}
}
// GetInstallationToken handles GET /admin/github-installation-token.
//
// Returns:
//
// 200 {"token": "ghs_...", "expires_at": "2026-04-17T22:50:00Z"}
// 404 {"error": "no GitHub App configured"} — GITHUB_APP_ID not set
// 404 {"error": "no token provider registered"} — plugin loaded but
// doesn't implement TokenProvider
// 500 {"error": "token refresh failed"} — provider returned error
//
// The 404 vs 403 distinction is intentional: a 404 means the feature is
// simply not configured, not that the caller is forbidden. This matches
// the pattern used by GET /admin/workspaces/:id/test-token.
//
// Callers must retry with exponential back-off on 500 — a transient
// upstream GitHub API error should not permanently block git operations.
func (h *GitHubTokenHandler) GetInstallationToken(c *gin.Context) {
if h.registry == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "no GitHub App configured"})
return
}
provider := h.registry.FirstTokenProvider()
if provider == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "no token provider registered"})
return
}
token, expiresAt, err := provider.Token(c.Request.Context())
if err != nil {
log.Printf("[github] token refresh failed: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "token refresh failed"})
return
}
if token == "" {
log.Printf("[github] token provider returned empty token")
c.JSON(http.StatusInternalServerError, gin.H{"error": "token refresh failed: empty token"})
return
}
// Never log the token itself.
log.Printf("[github] served fresh installation token (expires %s, TTL %.0fs)",
expiresAt.Format(time.RFC3339),
time.Until(expiresAt).Seconds())
c.JSON(http.StatusOK, gin.H{
"token": token,
"expires_at": expiresAt.UTC().Format(time.RFC3339),
})
}

View File

@ -0,0 +1,232 @@
package handlers
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
"github.com/gin-gonic/gin"
)
// ─── mock helpers ────────────────────────────────────────────────────────────
// mockMutatorOnly implements EnvMutator but NOT TokenProvider.
type mockMutatorOnly struct{ name string }
func (m *mockMutatorOnly) Name() string { return m.name }
func (m *mockMutatorOnly) MutateEnv(_ context.Context, _ string, _ map[string]string) error {
return nil
}
// mockTokenMutator implements both EnvMutator and TokenProvider.
// Set err to simulate a provider failure; otherwise returns token + expiresAt.
type mockTokenMutator struct {
name string
token string
expiresAt time.Time
err error
}
func (m *mockTokenMutator) Name() string { return m.name }
func (m *mockTokenMutator) MutateEnv(_ context.Context, _ string, _ map[string]string) error {
return nil
}
func (m *mockTokenMutator) Token(_ context.Context) (string, time.Time, error) {
return m.token, m.expiresAt, m.err
}
// ─── request helper ──────────────────────────────────────────────────────────
func newGitHubTokenRequest() (*httptest.ResponseRecorder, *gin.Context) {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest(http.MethodGet, "/admin/github-installation-token", nil)
return w, c
}
// ─── tests ───────────────────────────────────────────────────────────────────
// TestGitHubToken_NilRegistry — no GitHub App plugin loaded at all.
// Expect 404 so operators can distinguish "not configured" from "forbidden".
func TestGitHubToken_NilRegistry(t *testing.T) {
h := NewGitHubTokenHandler(nil)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404 for nil registry, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if body["error"] == "" {
t.Error("expected non-empty error field in response")
}
}
// TestGitHubToken_NoTokenProvider — plugin registered but doesn't implement
// TokenProvider (e.g. a non-GitHub mutator in the chain).
// Expect 404 — the GitHub App endpoint is not available.
func TestGitHubToken_NoTokenProvider(t *testing.T) {
reg := provisionhook.NewRegistry()
reg.Register(&mockMutatorOnly{name: "other-plugin"})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404 when no TokenProvider, got %d: %s", w.Code, w.Body.String())
}
}
// TestGitHubToken_ProviderError — provider returns an error (e.g. GitHub API
// unreachable). Expect 500 so the workspace credential helper retries.
func TestGitHubToken_ProviderError(t *testing.T) {
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "github-app-auth",
err: errors.New("github: 503 service unavailable"),
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500 on provider error, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if body["error"] == "" {
t.Error("expected non-empty error field in 500 response")
}
}
// TestGitHubToken_EmptyToken — provider returns no error but an empty token.
// This should never happen in normal operation but is a programming error in
// the plugin; treat it as a refresh failure.
func TestGitHubToken_EmptyToken(t *testing.T) {
exp := time.Now().Add(55 * time.Minute)
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "github-app-auth",
token: "", // empty — plugin bug
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500 for empty token, got %d: %s", w.Code, w.Body.String())
}
}
// TestGitHubToken_HappyPath — provider returns a valid token.
// Assert: 200, token present, expires_at is a valid RFC3339 timestamp
// with a positive TTL (i.e. the token is not already expired).
func TestGitHubToken_HappyPath(t *testing.T) {
exp := time.Now().UTC().Add(55 * time.Minute).Truncate(time.Second)
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "github-app-auth",
token: "ghs_TestTokenABC123",
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body struct {
Token string `json:"token"`
ExpiresAt string `json:"expires_at"`
}
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if body.Token != "ghs_TestTokenABC123" {
t.Errorf("expected token 'ghs_TestTokenABC123', got %q", body.Token)
}
parsed, err := time.Parse(time.RFC3339, body.ExpiresAt)
if err != nil {
t.Fatalf("expires_at is not valid RFC3339: %q — %v", body.ExpiresAt, err)
}
if !parsed.After(time.Now()) {
t.Errorf("expires_at %s is in the past — handler served an expired token", body.ExpiresAt)
}
}
// TestGitHubToken_FirstProviderWins — two mutators registered; only the first
// implements TokenProvider. Confirm the first one is used (registration order).
func TestGitHubToken_FirstProviderWins(t *testing.T) {
exp := time.Now().UTC().Add(55 * time.Minute)
reg := provisionhook.NewRegistry()
reg.Register(&mockTokenMutator{
name: "first-provider",
token: "ghs_First",
expiresAt: exp,
})
reg.Register(&mockTokenMutator{
name: "second-provider",
token: "ghs_Second",
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
_ = json.Unmarshal(w.Body.Bytes(), &body)
if body["token"] != "ghs_First" {
t.Errorf("expected first provider's token 'ghs_First', got %q", body["token"])
}
}
// TestGitHubToken_NonProviderBeforeProvider — a plain EnvMutator is registered
// first, then a TokenProvider. Confirm the provider is still found (skip over
// non-providers).
func TestGitHubToken_NonProviderBeforeProvider(t *testing.T) {
exp := time.Now().UTC().Add(55 * time.Minute)
reg := provisionhook.NewRegistry()
reg.Register(&mockMutatorOnly{name: "env-injector"})
reg.Register(&mockTokenMutator{
name: "github-app-auth",
token: "ghs_FoundBehindOther",
expiresAt: exp,
})
h := NewGitHubTokenHandler(reg)
w, c := newGitHubTokenRequest()
h.GetInstallationToken(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
_ = json.Unmarshal(w.Body.Bytes(), &body)
if body["token"] != "ghs_FoundBehindOther" {
t.Errorf("expected 'ghs_FoundBehindOther', got %q", body["token"])
}
}

View File

@ -28,9 +28,11 @@ func TestWorkspaceCreate_WithParentID(t *testing.T) {
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
parentID := "parent-ws-123"
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Child Agent", nil, 1, "langgraph", sqlmock.AnyArg(), &parentID, nil, "none").
WithArgs(sqlmock.AnyArg(), "Child Agent", nil, 1, "langgraph", sqlmock.AnyArg(), &parentID, nil, "none", (*int64)(nil)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(0), float64(0)).
WillReturnResult(sqlmock.NewResult(0, 1))
@ -61,9 +63,11 @@ func TestWorkspaceCreate_ExplicitClaudeCodeRuntime(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "CC Agent", nil, 2, "claude-code", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WithArgs(sqlmock.AnyArg(), "CC Agent", nil, 2, "claude-code", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(10), float64(20)).
WillReturnResult(sqlmock.NewResult(0, 1))
@ -190,12 +194,13 @@ func TestWorkspaceList_WithData(t *testing.T) {
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
rows := sqlmock.NewRows(columns).
AddRow("ws-1", "Agent One", "worker", 1, "online", []byte(`{"name":"agent1"}`), "http://localhost:8001",
nil, 3, 0.02, "", 7200, "processing", "langgraph", "", 10.0, 20.0, false).
nil, 3, 0.02, "", 7200, "processing", "langgraph", "", 10.0, 20.0, false, nil, int64(0)).
AddRow("ws-2", "Agent Two", "", 2, "degraded", []byte("null"), "",
nil, 0, 0.6, "timeout", 100, "", "claude-code", "", 50.0, 60.0, true)
nil, 0, 0.6, "timeout", 100, "", "claude-code", "", 50.0, 60.0, true, nil, int64(0))
mock.ExpectQuery("SELECT w.id, w.name").
WillReturnRows(rows)

View File

@ -248,11 +248,17 @@ func TestWorkspaceCreate(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", "/tmp/configs")
// Expect transaction begin for atomic workspace+secrets creation
mock.ExpectBegin()
// Expect workspace INSERT (uuid is dynamic, use AnyArg for id, runtime, awareness_namespace)
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Test Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WithArgs(sqlmock.AnyArg(), "Test Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil)).
WillReturnResult(sqlmock.NewResult(0, 1))
// Expect transaction commit (no secrets in this payload)
mock.ExpectCommit()
// Expect canvas_layouts INSERT
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(100), float64(200)).
@ -334,12 +340,13 @@ func TestWorkspaceList(t *testing.T) {
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
rows := sqlmock.NewRows(columns).
AddRow("ws-1", "Agent One", "worker", 1, "online", []byte("null"), "http://localhost:8001",
nil, 0, 0.0, "", 100, "", "claude-code", "", 10.0, 20.0, false).
nil, 0, 0.0, "", 100, "", "claude-code", "", 10.0, 20.0, false, nil, int64(0)).
AddRow("ws-2", "Agent Two", "manager", 2, "provisioning", []byte("null"), "",
nil, 0, 0.0, "", 0, "", "langgraph", "", 50.0, 60.0, false)
nil, 0, 0.0, "", 0, "", "langgraph", "", 50.0, 60.0, false, nil, int64(0))
mock.ExpectQuery("SELECT w.id, w.name").
WillReturnRows(rows)
@ -1001,12 +1008,14 @@ func TestWorkspaceGet_CurrentTask(t *testing.T) {
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-task").
WillReturnRows(sqlmock.NewRows(columns).AddRow(
"ws-task", "Task Worker", "worker", 1, "online", []byte("null"), "http://localhost:9000",
nil, 2, 0.0, "", 300, "Analyzing document", "langgraph", "", 10.0, 20.0, false,
nil, int64(0),
))
w := httptest.NewRecorder()

View File

@ -0,0 +1,254 @@
package handlers
import (
"context"
"database/sql"
"fmt"
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// resolveOrgID returns the effective org ID for a workspace: the parent_id
// when one exists, or the workspace's own ID when it is the org root.
// Returns an empty string if the workspace is not found.
func resolveOrgID(ctx context.Context, workspaceID string) (string, error) {
var parentID sql.NullString
err := db.DB.QueryRowContext(ctx,
`SELECT parent_id FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&parentID)
if err == sql.ErrNoRows {
return "", nil
}
if err != nil {
return "", err
}
if parentID.Valid && parentID.String != "" {
return parentID.String, nil
}
return workspaceID, nil
}
// checkOrgPluginAllowlist returns (true, reason) when the plugin is blocked
// by the org's allowlist, or (false, "") when the install is permitted.
//
// Semantics:
// - No allowlist rows for this org → allow-all (backward compat).
// - Allowlist exists and plugin is on it → allowed.
// - Allowlist exists and plugin is NOT on it → blocked (403).
// - DB errors → fail-open with a log (don't block installs on DB hiccup).
func checkOrgPluginAllowlist(ctx context.Context, workspaceID, pluginName string) (blocked bool, reason string) {
orgID, err := resolveOrgID(ctx, workspaceID)
if err != nil {
log.Printf("allowlist: resolveOrgID(%s) failed: %v — allowing install", workspaceID, err)
return false, ""
}
if orgID == "" {
return false, "" // workspace not found; let later checks handle it
}
var allowed bool
err = db.DB.QueryRowContext(ctx, `
SELECT EXISTS(
SELECT 1 FROM org_plugin_allowlist
WHERE org_id = $1 AND plugin_name = $2
)
`, orgID, pluginName).Scan(&allowed)
if err != nil {
log.Printf("allowlist: existence check failed (org=%s plugin=%s): %v — allowing install", orgID, pluginName, err)
return false, ""
}
if allowed {
return false, "" // explicitly on the allowlist
}
// Check whether an allowlist exists at all. Empty allowlist = allow-all.
var count int
if err := db.DB.QueryRowContext(ctx,
`SELECT COUNT(*) FROM org_plugin_allowlist WHERE org_id = $1`,
orgID,
).Scan(&count); err != nil {
log.Printf("allowlist: count check failed (org=%s): %v — allowing install", orgID, err)
return false, ""
}
if count == 0 {
return false, "" // no allowlist configured — allow-all
}
return true, fmt.Sprintf("plugin %q is not in the org allowlist", pluginName)
}
// OrgPluginAllowlistHandler manages the per-org plugin governance registry.
type OrgPluginAllowlistHandler struct{}
// NewOrgPluginAllowlistHandler constructs an OrgPluginAllowlistHandler.
func NewOrgPluginAllowlistHandler() *OrgPluginAllowlistHandler {
return &OrgPluginAllowlistHandler{}
}
// allowlistEntry is the JSON shape for a single allowlist record.
type allowlistEntry struct {
PluginName string `json:"plugin_name"`
EnabledBy string `json:"enabled_by"`
EnabledAt time.Time `json:"enabled_at"`
}
// putAllowlistRequest is the request body for PUT /orgs/:id/plugins/allowlist.
// Plugins holds the complete desired allowlist; the handler replaces the
// current entries atomically. An empty slice clears the allowlist (allow-all).
type putAllowlistRequest struct {
Plugins []string `json:"plugins"`
EnabledBy string `json:"enabled_by"` // workspace ID of the admin performing the change
}
// GetAllowlist handles GET /orgs/:id/plugins/allowlist.
//
// Returns the current allowlist for the org workspace identified by :id.
// An empty array means no allowlist is configured (allow-all). Auth: AdminAuth.
func (h *OrgPluginAllowlistHandler) GetAllowlist(c *gin.Context) {
orgID := c.Param("id")
ctx := c.Request.Context()
// Verify the org workspace exists.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
orgID,
).Scan(&exists); err != nil {
log.Printf("allowlist: org check failed for %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify org"})
return
}
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "org not found"})
return
}
rows, err := db.DB.QueryContext(ctx, `
SELECT plugin_name, enabled_by, enabled_at
FROM org_plugin_allowlist
WHERE org_id = $1
ORDER BY plugin_name
`, orgID)
if err != nil {
log.Printf("allowlist: query failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch allowlist"})
return
}
defer rows.Close()
entries := make([]allowlistEntry, 0)
for rows.Next() {
var e allowlistEntry
if err := rows.Scan(&e.PluginName, &e.EnabledBy, &e.EnabledAt); err != nil {
log.Printf("allowlist: scan error for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read allowlist"})
return
}
entries = append(entries, e)
}
if err := rows.Err(); err != nil {
log.Printf("allowlist: rows error for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read allowlist"})
return
}
c.JSON(http.StatusOK, gin.H{
"org_id": orgID,
"plugins": entries,
"allow_all": len(entries) == 0,
})
}
// PutAllowlist handles PUT /orgs/:id/plugins/allowlist.
//
// Replaces the org's allowlist atomically with the supplied plugin names.
// Sending an empty plugins array clears the allowlist (reverts to allow-all).
// Auth: AdminAuth.
func (h *OrgPluginAllowlistHandler) PutAllowlist(c *gin.Context) {
orgID := c.Param("id")
ctx := c.Request.Context()
var req putAllowlistRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.EnabledBy == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "enabled_by is required"})
return
}
// Validate each plugin name for safety before touching the DB.
for _, name := range req.Plugins {
if err := validatePluginName(name); err != nil {
c.JSON(http.StatusBadRequest, gin.H{
"error": "invalid plugin name",
"plugin_name": name,
"detail": err.Error(),
})
return
}
}
// Verify the org workspace exists.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
orgID,
).Scan(&exists); err != nil {
log.Printf("allowlist: org check failed for %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify org"})
return
}
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "org not found"})
return
}
// Replace atomically: delete all current entries, then insert the new set.
tx, err := db.DB.BeginTx(ctx, nil)
if err != nil {
log.Printf("allowlist: begin tx failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start transaction"})
return
}
defer tx.Rollback() //nolint:errcheck // superseded by Commit on success path
if _, err := tx.ExecContext(ctx,
`DELETE FROM org_plugin_allowlist WHERE org_id = $1`,
orgID,
); err != nil {
log.Printf("allowlist: delete failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update allowlist"})
return
}
for _, name := range req.Plugins {
if _, err := tx.ExecContext(ctx, `
INSERT INTO org_plugin_allowlist (org_id, plugin_name, enabled_by)
VALUES ($1, $2, $3)
ON CONFLICT (org_id, plugin_name) DO NOTHING
`, orgID, name, req.EnabledBy); err != nil {
log.Printf("allowlist: insert %q failed for org %s: %v", name, orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update allowlist"})
return
}
}
if err := tx.Commit(); err != nil {
log.Printf("allowlist: commit failed for org %s: %v", orgID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to commit allowlist update"})
return
}
c.JSON(http.StatusOK, gin.H{
"org_id": orgID,
"plugins": req.Plugins,
"allow_all": len(req.Plugins) == 0,
})
}

View File

@ -0,0 +1,555 @@
package handlers
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// ─── helpers ───────────────────────────────────────────────────────────────
func newAllowlistGET(orgID string) (*httptest.ResponseRecorder, *gin.Context) {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: orgID}}
c.Request = httptest.NewRequest(http.MethodGet, "/orgs/"+orgID+"/plugins/allowlist", nil)
return w, c
}
func newAllowlistPUT(orgID string, body interface{}) (*httptest.ResponseRecorder, *gin.Context) {
b, _ := json.Marshal(body)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: orgID}}
c.Request = httptest.NewRequest(http.MethodPut, "/orgs/"+orgID+"/plugins/allowlist",
bytes.NewReader(b))
c.Request.Header.Set("Content-Type", "application/json")
return w, c
}
// ─── GetAllowlist ──────────────────────────────────────────────────────────
func TestGetAllowlist_OrgNotFound(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-missing").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-missing")
h.GetAllowlist(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
}
}
func TestGetAllowlist_DBErrorOnOrgCheck(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnError(sql.ErrConnDone)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
}
}
func TestGetAllowlist_Empty(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "enabled_by", "enabled_at"}))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
OrgID string `json:"org_id"`
Plugins []allowlistEntry `json:"plugins"`
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if resp.OrgID != "org-1" {
t.Errorf("expected org_id=org-1, got %q", resp.OrgID)
}
if len(resp.Plugins) != 0 {
t.Errorf("expected 0 plugins, got %d", len(resp.Plugins))
}
if !resp.AllowAll {
t.Error("expected allow_all=true for empty list")
}
}
func TestGetAllowlist_WithEntries(t *testing.T) {
mock := setupTestDB(t)
ts := time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "enabled_by", "enabled_at"}).
AddRow("browser-automation", "admin-ws", ts).
AddRow("superpowers", "admin-ws", ts))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
OrgID string `json:"org_id"`
Plugins []allowlistEntry `json:"plugins"`
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if len(resp.Plugins) != 2 {
t.Fatalf("expected 2 plugins, got %d", len(resp.Plugins))
}
if resp.Plugins[0].PluginName != "browser-automation" {
t.Errorf("expected first plugin=browser-automation, got %q", resp.Plugins[0].PluginName)
}
if resp.AllowAll {
t.Error("expected allow_all=false when list is non-empty")
}
}
func TestGetAllowlist_DBErrorOnQuery(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
WithArgs("org-1").
WillReturnError(sql.ErrConnDone)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistGET("org-1")
h.GetAllowlist(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
}
}
// ─── PutAllowlist ──────────────────────────────────────────────────────────
func TestPutAllowlist_MissingEnabledBy(t *testing.T) {
setupTestDB(t)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"my-plugin"},
// enabled_by intentionally omitted
})
h.PutAllowlist(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_InvalidPluginName(t *testing.T) {
setupTestDB(t)
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"../../evil"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("expected 400 for invalid plugin name, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_OrgNotFound(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-missing").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-missing", map[string]interface{}{
"plugins": []string{"my-plugin"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_AddPlugins(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "my-plugin", "admin-ws").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"my-plugin"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
OrgID string `json:"org_id"`
Plugins []string `json:"plugins"`
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if len(resp.Plugins) != 1 || resp.Plugins[0] != "my-plugin" {
t.Errorf("unexpected plugins: %v", resp.Plugins)
}
if resp.AllowAll {
t.Error("expected allow_all=false for non-empty plugins list")
}
}
func TestPutAllowlist_ClearAllowlist(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 3))
// No INSERT expected — empty plugins slice.
mock.ExpectCommit()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
AllowAll bool `json:"allow_all"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("bad JSON: %v", err)
}
if !resp.AllowAll {
t.Error("expected allow_all=true after clearing all plugins")
}
}
func TestPutAllowlist_MultiplePlugins(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "browser-automation", "admin-ws").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "superpowers", "admin-ws").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"browser-automation", "superpowers"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
}
func TestPutAllowlist_InsertFails(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("org-1").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectBegin()
mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
WithArgs("org-1").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
WithArgs("org-1", "my-plugin", "admin-ws").
WillReturnError(sql.ErrConnDone)
mock.ExpectRollback()
h := NewOrgPluginAllowlistHandler()
w, c := newAllowlistPUT("org-1", map[string]interface{}{
"plugins": []string{"my-plugin"},
"enabled_by": "admin-ws",
})
h.PutAllowlist(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500 on insert failure, got %d: %s", w.Code, w.Body.String())
}
}
// ─── resolveOrgID ──────────────────────────────────────────────────────────
func TestResolveOrgID_OrgRoot(t *testing.T) {
mock := setupTestDB(t)
// workspace has no parent → it IS the org root
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-root").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
got, err := resolveOrgID(context.Background(), "ws-root")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != "ws-root" {
t.Errorf("expected ws-root, got %q", got)
}
}
func TestResolveOrgID_WithParent(t *testing.T) {
mock := setupTestDB(t)
// workspace has a parent → parent is the org root
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-child").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
got, err := resolveOrgID(context.Background(), "ws-child")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != "ws-parent" {
t.Errorf("expected ws-parent, got %q", got)
}
}
func TestResolveOrgID_NotFound(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-ghost").
WillReturnError(sql.ErrNoRows)
got, err := resolveOrgID(context.Background(), "ws-ghost")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != "" {
t.Errorf("expected empty string for not-found workspace, got %q", got)
}
}
// ─── checkOrgPluginAllowlist ───────────────────────────────────────────────
func TestCheckOrgPluginAllowlist_AllowAll_EmptyList(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: no parent → ws-1 is org root
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// plugin NOT in list
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "my-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// count = 0 → allow-all
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "my-plugin")
if blocked {
t.Errorf("expected not blocked (allow-all), got blocked: %s", reason)
}
}
func TestCheckOrgPluginAllowlist_Allowed_OnList(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: no parent
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// plugin IS in the allowlist
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "my-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "my-plugin")
if blocked {
t.Errorf("expected not blocked (on list), got blocked: %s", reason)
}
}
func TestCheckOrgPluginAllowlist_Blocked_NotOnList(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: no parent
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// plugin NOT in the list
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "evil-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// count > 0 → allowlist is active
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(2))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "evil-plugin")
if !blocked {
t.Error("expected plugin to be blocked (not on non-empty allowlist)")
}
if reason == "" {
t.Error("expected non-empty reason when blocked")
}
}
func TestCheckOrgPluginAllowlist_ChildWorkspace_UsesParentOrg(t *testing.T) {
mock := setupTestDB(t)
// resolveOrgID: ws-child has parent ws-parent
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-child").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
// allowlist check uses parent org ID (ws-parent)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-parent", "my-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-child", "my-plugin")
if blocked {
t.Errorf("expected not blocked (on parent's allowlist), got blocked: %s", reason)
}
}
func TestCheckOrgPluginAllowlist_FailOpen_OnResolveError(t *testing.T) {
mock := setupTestDB(t)
// DB error during resolveOrgID → fail-open
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnError(sql.ErrConnDone)
blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
if blocked {
t.Error("expected fail-open (not blocked) on DB error during resolveOrgID")
}
}
func TestCheckOrgPluginAllowlist_FailOpen_OnExistsError(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// DB error on EXISTS check → fail-open
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "any-plugin").
WillReturnError(sql.ErrConnDone)
blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
if blocked {
t.Error("expected fail-open (not blocked) on DB error during EXISTS check")
}
}
func TestCheckOrgPluginAllowlist_FailOpen_OnCountError(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
WithArgs("ws-1").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-1", "any-plugin").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// DB error on COUNT check → fail-open
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
WithArgs("ws-1").
WillReturnError(sql.ErrConnDone)
blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
if blocked {
t.Error("expected fail-open (not blocked) on DB error during COUNT check")
}
}

View File

@ -63,6 +63,14 @@ func (h *PluginsHandler) Install(c *gin.Context) {
// has already cleaned it up (and its returned result is nil).
defer os.RemoveAll(result.StagedDir)
// Org plugin allowlist gate (#591).
// If the workspace's org has a non-empty allowlist, the plugin must be
// on it. An empty allowlist means allow-all (backward compat).
if blocked, reason := checkOrgPluginAllowlist(ctx, workspaceID, result.PluginName); blocked {
c.JSON(http.StatusForbidden, gin.H{"error": reason})
return
}
if err := h.deliverToContainer(ctx, workspaceID, result); err != nil {
var he *httpErr
if errors.As(err, &he) {

View File

@ -235,22 +235,58 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
var prevTask string
_ = db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, '') FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask)
// #615: Clamp monthly_spend to a safe range before any DB write.
// A malicious or buggy agent could report math.MaxInt64, causing
// NUMERIC overflow or incorrect budget-enforcement comparisons.
// Negatives are meaningless (spend is always ≥ 0); the upper cap of
// $10 billion in cents is an intentionally astronomical value that no
// legitimate workspace will ever reach.
const maxMonthlySpend = int64(1_000_000_000_000) // $10B in cents
if payload.MonthlySpend < 0 {
payload.MonthlySpend = 0
}
if payload.MonthlySpend > maxMonthlySpend {
payload.MonthlySpend = maxMonthlySpend
}
// Update heartbeat columns. #73 guard: exclude 'removed' rows so a
// late heartbeat from a container that's being torn down doesn't
// refresh last_heartbeat_at on a tombstoned workspace (which would
// otherwise confuse the liveness monitor).
_, err := db.DB.ExecContext(ctx, `
UPDATE workspaces SET
last_heartbeat_at = now(),
last_error_rate = $2,
last_sample_error = $3,
active_tasks = $4,
uptime_seconds = $5,
current_task = $6,
updated_at = now()
WHERE id = $1 AND status != 'removed'
`, payload.WorkspaceID, payload.ErrorRate, payload.SampleError,
payload.ActiveTasks, payload.UptimeSeconds, payload.CurrentTask)
//
// monthly_spend: updated when the agent reports a positive value (cumulative
// USD cents for the current month). Zero means "no update" — never write
// zero to avoid accidentally clearing a previously-reported spend value.
var err error
if payload.MonthlySpend > 0 {
_, err = db.DB.ExecContext(ctx, `
UPDATE workspaces SET
last_heartbeat_at = now(),
last_error_rate = $2,
last_sample_error = $3,
active_tasks = $4,
uptime_seconds = $5,
current_task = $6,
monthly_spend = $7,
updated_at = now()
WHERE id = $1 AND status != 'removed'
`, payload.WorkspaceID, payload.ErrorRate, payload.SampleError,
payload.ActiveTasks, payload.UptimeSeconds, payload.CurrentTask,
payload.MonthlySpend)
} else {
_, err = db.DB.ExecContext(ctx, `
UPDATE workspaces SET
last_heartbeat_at = now(),
last_error_rate = $2,
last_sample_error = $3,
active_tasks = $4,
uptime_seconds = $5,
current_task = $6,
updated_at = now()
WHERE id = $1 AND status != 'removed'
`, payload.WorkspaceID, payload.ErrorRate, payload.SampleError,
payload.ActiveTasks, payload.UptimeSeconds, payload.CurrentTask)
}
if err != nil {
log.Printf("Heartbeat update error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update"})

View File

@ -654,3 +654,184 @@ func TestRegister_DBErrorResponseIsOpaque(t *testing.T) {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ==================== #615 — monthly_spend clamping ====================
// TestHeartbeat_MonthlySpend_WithinBounds verifies that a valid positive
// monthly_spend is written to the DB unchanged (no clamping needed).
func TestHeartbeat_MonthlySpend_WithinBounds(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewRegistryHandler(newTestBroadcaster())
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-spend-ok").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// Expect the 7-argument UPDATE (with monthly_spend = $7).
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-spend-ok", 0.0, "", 0, 0, "", int64(15000)). // $150.00
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id").
WithArgs("ws-spend-ok").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-spend-ok","monthly_spend":15000}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestHeartbeat_MonthlySpend_NegativeClamped verifies that a negative
// monthly_spend value (invalid) is clamped to 0 before the DB write,
// which means the no-spend UPDATE path is taken (zero is "no update"). (#615)
func TestHeartbeat_MonthlySpend_NegativeClamped(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewRegistryHandler(newTestBroadcaster())
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-spend-neg").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// Clamped to 0 → no monthly_spend field → 6-argument UPDATE.
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-spend-neg", 0.0, "", 0, 0, "").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id").
WithArgs("ws-spend-neg").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-spend-neg","monthly_spend":-9999}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("negative monthly_spend must be clamped to 0 (no-spend UPDATE path): %v", err)
}
}
// TestHeartbeat_MonthlySpend_OverflowClamped verifies that an astronomically
// large monthly_spend is clamped to maxMonthlySpend ($10B in cents) rather
// than written raw to the DB, preventing NUMERIC overflow. (#615)
func TestHeartbeat_MonthlySpend_OverflowClamped(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewRegistryHandler(newTestBroadcaster())
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-spend-overflow").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// Expect the 7-argument UPDATE with monthly_spend clamped to 1_000_000_000_000.
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-spend-overflow", 0.0, "", 0, 0, "", int64(1_000_000_000_000)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id").
WithArgs("ws-spend-overflow").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
// Simulate a misbehaving agent reporting math.MaxInt64.
body := `{"workspace_id":"ws-spend-overflow","monthly_spend":9223372036854775807}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("math.MaxInt64 monthly_spend must be clamped to maxMonthlySpend: %v", err)
}
}
// TestHeartbeat_MonthlySpend_ExactCap verifies the boundary: a value exactly
// equal to maxMonthlySpend ($10B) passes through without modification.
func TestHeartbeat_MonthlySpend_ExactCap(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewRegistryHandler(newTestBroadcaster())
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-spend-cap").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-spend-cap", 0.0, "", 0, 0, "", int64(1_000_000_000_000)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id").
WithArgs("ws-spend-cap").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-spend-cap","monthly_spend":1000000000000}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("exact-cap monthly_spend should pass through unmodified: %v", err)
}
}
// TestHeartbeat_MonthlySpend_Zero_NoUpdate verifies that monthly_spend=0 (or
// omitted) does NOT write monthly_spend to the DB — zero means "no update",
// never write zero to avoid clearing a previously-reported spend value.
func TestHeartbeat_MonthlySpend_Zero_NoUpdate(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewRegistryHandler(newTestBroadcaster())
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-spend-zero").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// 6-argument UPDATE — monthly_spend NOT included.
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-spend-zero", 0.0, "", 0, 0, "").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id").
WithArgs("ws-spend-zero").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
// Explicitly set monthly_spend = 0.
body := `{"workspace_id":"ws-spend-zero","monthly_spend":0}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("monthly_spend=0 must not trigger a DB write for spend: %v", err)
}
}

View File

@ -0,0 +1,107 @@
package handlers
import (
"encoding/json"
"fmt"
"log"
"net/http"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/gin-gonic/gin"
)
// aguiEvent is the AG-UI envelope written to the SSE stream.
// Spec: {"type":"<event_name>","timestamp":<unix_ms>,"data":{...}}
type aguiEvent struct {
Type string `json:"type"`
Timestamp int64 `json:"timestamp"` // Unix milliseconds
Data json.RawMessage `json:"data"`
}
// SSEHandler streams workspace events as AG-UI-compatible Server-Sent Events.
type SSEHandler struct {
broadcaster *events.Broadcaster
}
// NewSSEHandler returns an SSEHandler that sources events from b.
func NewSSEHandler(b *events.Broadcaster) *SSEHandler {
return &SSEHandler{broadcaster: b}
}
// StreamEvents handles GET /workspaces/:id/events/stream.
//
// Authentication is enforced by the upstream WorkspaceAuth middleware (bearer
// token bound to :id). This handler only needs to:
// 1. Verify the workspace exists (returns 404 if not).
// 2. Set SSE headers.
// 3. Subscribe to the in-process broadcaster and relay events until the
// client disconnects (context cancellation).
//
// AG-UI envelope per event:
//
// data: {"type":"<event>","timestamp":<unix_ms>,"data":{...}}\n\n
func (h *SSEHandler) StreamEvents(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Verify the workspace exists — 404 early rather than serving an empty stream.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
workspaceID,
).Scan(&exists); err != nil {
log.Printf("SSE: workspace existence check failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify workspace"})
return
}
if !exists {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
// SSE response headers.
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
// Instruct nginx / reverse-proxies to disable buffering so events reach
// the client immediately rather than being held in a proxy buffer.
c.Header("X-Accel-Buffering", "no")
flusher, ok := c.Writer.(http.Flusher)
if !ok {
// Should never happen with gin's responseWriter, but guard defensively.
c.JSON(http.StatusInternalServerError, gin.H{"error": "streaming not supported"})
return
}
ch, cancel := h.broadcaster.SubscribeSSE(workspaceID)
defer cancel()
// Send an initial SSE comment so the client knows the stream is live.
fmt.Fprintf(c.Writer, ": ping\n\n")
flusher.Flush()
for {
select {
case <-ctx.Done():
return
case msg, ok := <-ch:
if !ok {
return
}
env := aguiEvent{
Type: msg.Event,
Timestamp: msg.Timestamp.UnixMilli(),
Data: msg.Payload,
}
b, err := json.Marshal(env)
if err != nil {
log.Printf("SSE: marshal error for workspace %s event %s: %v", workspaceID, msg.Event, err)
continue
}
fmt.Fprintf(c.Writer, "data: %s\n\n", b)
flusher.Flush()
}
}
}

View File

@ -0,0 +1,237 @@
package handlers
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// expectWorkspaceExists queues the EXISTS query that StreamEvents fires first.
func expectWorkspaceExists(mock sqlmock.Sqlmock, workspaceID string, exists bool) {
rows := sqlmock.NewRows([]string{"exists"}).AddRow(exists)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs(workspaceID).
WillReturnRows(rows)
}
// runSSEHandler starts StreamEvents in a background goroutine using a
// cancellable context, waits waitAfterStart for the handler to subscribe,
// then returns a drain function (cancel + wait for goroutine exit).
func runSSEHandler(t *testing.T, h *SSEHandler, workspaceID string) (
w *httptest.ResponseRecorder,
inject func(), // call to cancel immediately
done <-chan struct{},
) {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
w = httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: workspaceID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+workspaceID+"/events/stream", nil).WithContext(ctx)
doneCh := make(chan struct{})
go func() {
defer close(doneCh)
h.StreamEvents(c)
}()
return w, cancel, doneCh
}
// TestSSE_ContentType verifies the handler sets text/event-stream on the response.
func TestSSE_ContentType(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
// Allow the handler to subscribe, then tear it down.
time.Sleep(30 * time.Millisecond)
cancel()
<-done
ct := w.Header().Get("Content-Type")
if !strings.HasPrefix(ct, "text/event-stream") {
t.Errorf("expected Content-Type text/event-stream, got %q", ct)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_InitialPing verifies the handler emits the ": ping" SSE comment on connect.
func TestSSE_InitialPing(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
time.Sleep(30 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
if !strings.Contains(body, ": ping") {
t.Errorf("expected SSE ping comment, body was:\n%s", body)
}
}
// TestSSE_AGUIFormat verifies that a broadcast event is wrapped in the AG-UI envelope.
func TestSSE_AGUIFormat(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
// Wait for the handler goroutine to reach its select loop.
time.Sleep(30 * time.Millisecond)
b.BroadcastOnly("ws-1", "TASK_UPDATED", map[string]string{"status": "running"})
time.Sleep(30 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
// Find the first "data: ..." line.
var dataLine string
for _, line := range strings.Split(body, "\n") {
if strings.HasPrefix(line, "data: ") {
dataLine = strings.TrimPrefix(line, "data: ")
break
}
}
if dataLine == "" {
t.Fatalf("no data: line found in SSE response:\n%s", body)
}
var env struct {
Type string `json:"type"`
Timestamp int64 `json:"timestamp"`
Data json.RawMessage `json:"data"`
}
if err := json.Unmarshal([]byte(dataLine), &env); err != nil {
t.Fatalf("invalid AG-UI envelope JSON %q: %v", dataLine, err)
}
if env.Type != "TASK_UPDATED" {
t.Errorf("expected type TASK_UPDATED, got %q", env.Type)
}
if env.Timestamp <= 0 {
t.Errorf("expected positive timestamp, got %d", env.Timestamp)
}
if len(env.Data) == 0 || string(env.Data) == "null" {
t.Errorf("expected non-null data field, got %q", string(env.Data))
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_WorkspaceFilter verifies that events for a different workspace are NOT delivered.
func TestSSE_WorkspaceFilter(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
time.Sleep(30 * time.Millisecond)
// Broadcast to a completely different workspace.
b.BroadcastOnly("ws-99", "AGENT_MESSAGE", map[string]string{"text": "secret"})
time.Sleep(30 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
for _, line := range strings.Split(body, "\n") {
if strings.HasPrefix(line, "data: ") {
t.Errorf("expected no data: events for different workspace, got: %s", line)
}
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_WorkspaceNotFound verifies a 404 is returned when the workspace does not exist.
func TestSSE_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "missing-ws", false)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "missing-ws"}}
c.Request = httptest.NewRequest("GET", "/workspaces/missing-ws/events/stream", nil)
h.StreamEvents(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404 for missing workspace, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestSSE_MultipleEventsDelivered verifies multiple sequential broadcasts all arrive.
func TestSSE_MultipleEventsDelivered(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExists(mock, "ws-1", true)
b := newTestBroadcaster()
h := NewSSEHandler(b)
w, cancel, done := runSSEHandler(t, h, "ws-1")
time.Sleep(30 * time.Millisecond)
b.BroadcastOnly("ws-1", "AGENT_MESSAGE", map[string]string{"msg": "one"})
b.BroadcastOnly("ws-1", "TASK_UPDATED", map[string]string{"status": "done"})
b.BroadcastOnly("ws-1", "A2A_RESPONSE", map[string]string{"result": "ok"})
time.Sleep(50 * time.Millisecond)
cancel()
<-done
body := w.Body.String()
var dataLines []string
for _, line := range strings.Split(body, "\n") {
if strings.HasPrefix(line, "data: ") {
dataLines = append(dataLines, line)
}
}
if len(dataLines) != 3 {
t.Errorf("expected 3 data: lines, got %d:\n%s", len(dataLines), body)
}
// Verify event types appear in order.
expectedTypes := []string{"AGENT_MESSAGE", "TASK_UPDATED", "A2A_RESPONSE"}
for i, dl := range dataLines {
var env struct {
Type string `json:"type"`
}
if err := json.Unmarshal([]byte(strings.TrimPrefix(dl, "data: ")), &env); err != nil {
t.Fatalf("line %d: invalid JSON: %v", i, err)
}
if env.Type != expectedTypes[i] {
t.Errorf("line %d: expected type %s, got %s", i, expectedTypes[i], env.Type)
}
}
}

View File

@ -10,6 +10,7 @@ import (
"path/filepath"
"strings"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
@ -59,6 +60,14 @@ func (h *WorkspaceHandler) SetEnvMutators(r *provisionhook.Registry) {
h.envMutators = r
}
// TokenRegistry returns the provisionhook.Registry so the router can
// wire the GET /admin/github-installation-token handler without coupling
// to WorkspaceHandler's internals. Returns nil when no plugin has been
// registered (dev / self-hosted deployments without a GitHub App).
func (h *WorkspaceHandler) TokenRegistry() *provisionhook.Registry {
return h.envMutators
}
// Create handles POST /workspaces
func (h *WorkspaceHandler) Create(c *gin.Context) {
var payload models.CreateWorkspacePayload
@ -129,17 +138,59 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
return
}
// Insert workspace with runtime persisted in DB
_, err := db.DB.ExecContext(ctx, `
INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access)
VALUES ($1, $2, $3, $4, $5, $6, 'provisioning', $7, $8, $9)
`, id, payload.Name, role, payload.Tier, payload.Runtime, awarenessNamespace, payload.ParentID, workspaceDir, workspaceAccess)
// Begin a transaction so the workspace row and any initial secrets are
// committed atomically. A secret-encrypt or DB error rolls back the
// workspace insert so we never leave a workspace row with missing secrets.
tx, txErr := db.DB.BeginTx(ctx, nil)
if txErr != nil {
log.Printf("Create workspace: begin tx error: %v", txErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
return
}
// Insert workspace with runtime persisted in DB (inside transaction)
_, err := tx.ExecContext(ctx, `
INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access, budget_limit)
VALUES ($1, $2, $3, $4, $5, $6, 'provisioning', $7, $8, $9, $10)
`, id, payload.Name, role, payload.Tier, payload.Runtime, awarenessNamespace, payload.ParentID, workspaceDir, workspaceAccess, payload.BudgetLimit)
if err != nil {
tx.Rollback() //nolint:errcheck
log.Printf("Create workspace error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
return
}
// Persist initial secrets from the create payload (inside same transaction).
// nil/empty map is a no-op. Any failure rolls back the workspace insert
// so we never have a workspace row without its intended secrets.
for k, v := range payload.Secrets {
encrypted, encErr := crypto.Encrypt([]byte(v))
if encErr != nil {
tx.Rollback() //nolint:errcheck
log.Printf("Create workspace %s: failed to encrypt secret %q: %v", id, k, encErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encrypt secret: " + k})
return
}
version := crypto.CurrentEncryptionVersion()
if _, dbErr := tx.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, $2, $3, $4)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $3, encryption_version = $4, updated_at = now()
`, id, k, encrypted, version); dbErr != nil {
tx.Rollback() //nolint:errcheck
log.Printf("Create workspace %s: failed to persist secret %q: %v", id, k, dbErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save secret: " + k})
return
}
}
if commitErr := tx.Commit(); commitErr != nil {
log.Printf("Create workspace %s: transaction commit failed: %v", id, commitErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
return
}
// Insert canvas layout — non-fatal: workspace can be dragged into position later
if _, err := db.DB.ExecContext(ctx, `
INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)
@ -242,10 +293,13 @@ func scanWorkspaceRow(rows interface {
var collapsed bool
var parentID *string
var agentCard []byte
var budgetLimit sql.NullInt64
var monthlySpend int64
err := rows.Scan(&id, &name, &role, &tier, &status, &agentCard, &url,
&parentID, &activeTasks, &errorRate, &sampleError, &uptimeSeconds,
&currentTask, &runtime, &workspaceDir, &x, &y, &collapsed)
&currentTask, &runtime, &workspaceDir, &x, &y, &collapsed,
&budgetLimit, &monthlySpend)
if err != nil {
return nil, err
}
@ -264,11 +318,19 @@ func scanWorkspaceRow(rows interface {
"current_task": currentTask,
"runtime": runtime,
"workspace_dir": nilIfEmpty(workspaceDir),
"monthly_spend": monthlySpend,
"x": x,
"y": y,
"collapsed": collapsed,
}
// budget_limit: nil when no limit set, int64 otherwise
if budgetLimit.Valid {
ws["budget_limit"] = budgetLimit.Int64
} else {
ws["budget_limit"] = nil
}
// Only include non-empty values
if role != "" {
ws["role"] = role
@ -293,7 +355,8 @@ const workspaceListQuery = `
COALESCE(w.last_sample_error, ''), w.uptime_seconds,
COALESCE(w.current_task, ''), COALESCE(w.runtime, 'langgraph'),
COALESCE(w.workspace_dir, ''),
COALESCE(cl.x, 0), COALESCE(cl.y, 0), COALESCE(cl.collapsed, false)
COALESCE(cl.x, 0), COALESCE(cl.y, 0), COALESCE(cl.collapsed, false),
w.budget_limit, COALESCE(w.monthly_spend, 0)
FROM workspaces w
LEFT JOIN canvas_layouts cl ON cl.workspace_id = w.id
WHERE w.status != 'removed'
@ -338,7 +401,8 @@ func (h *WorkspaceHandler) Get(c *gin.Context) {
COALESCE(w.last_sample_error, ''), w.uptime_seconds,
COALESCE(w.current_task, ''), COALESCE(w.runtime, 'langgraph'),
COALESCE(w.workspace_dir, ''),
COALESCE(cl.x, 0), COALESCE(cl.y, 0), COALESCE(cl.collapsed, false)
COALESCE(cl.x, 0), COALESCE(cl.y, 0), COALESCE(cl.collapsed, false),
w.budget_limit, COALESCE(w.monthly_spend, 0)
FROM workspaces w
LEFT JOIN canvas_layouts cl ON cl.workspace_id = w.id
WHERE w.id = $1
@ -355,6 +419,12 @@ func (h *WorkspaceHandler) Get(c *gin.Context) {
return
}
// Strip financial fields — GET /workspaces/:id is on the open router.
// Any caller with a valid UUID would otherwise read billing data.
// The dedicated budget/spend endpoints are AdminAuth-gated. (#611)
delete(ws, "budget_limit")
delete(ws, "monthly_spend")
c.JSON(http.StatusOK, ws)
}
@ -455,6 +525,10 @@ var sensitiveUpdateFields = map[string]struct{}{
"parent_id": {},
"runtime": {},
"workspace_dir": {},
// budget_limit is intentionally NOT here. The dedicated
// PATCH /workspaces/:id/budget (AdminAuth) is the only write path.
// Accepting it here — even behind ValidateAnyToken — lets workspace agents
// self-clear their own spending ceiling. (#611 Security Auditor finding)
}
// Update handles PATCH /workspaces/:id
@ -552,6 +626,10 @@ func (h *WorkspaceHandler) Update(c *gin.Context) {
}
needsRestart = true
}
// NOTE: budget_limit is intentionally NOT handled here. The dedicated
// PATCH /workspaces/:id/budget (AdminAuth) is the only write path.
// This endpoint uses ValidateAnyToken — any enrolled workspace bearer
// could otherwise self-clear its own spending ceiling. (#611 Security Auditor)
// Update canvas position if both x and y provided
if x, xOk := body["x"]; xOk {

View File

@ -0,0 +1,438 @@
package handlers
// Tests for per-workspace budget_limit field and A2A enforcement (#541).
//
// Coverage:
// - GET /workspaces/:id includes budget_limit (nil when unset, int when set)
// - GET /workspaces/:id includes monthly_spend
// - POST /workspaces creates workspace with budget_limit
// - PATCH /workspaces/:id updates budget_limit (nil clears the ceiling)
// - A2A proxy returns 429 when monthly_spend >= budget_limit
// - A2A proxy passes through when monthly_spend < budget_limit
// - A2A proxy passes through when budget_limit is NULL (no limit)
// - A2A proxy fail-open on DB error during budget check
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// wsColumns is the canonical column list for scanWorkspaceRow tests.
var wsColumns = []string{
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
// ==================== GET — financial fields stripped from open endpoint ====================
// TestWorkspaceBudget_Get_NilLimit verifies that budget_limit and monthly_spend
// are NOT present in GET /workspaces/:id. The endpoint is on the open router —
// any caller with a valid UUID must not read billing data. (#611 Security Auditor)
func TestWorkspaceBudget_Get_NilLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-nobudget").
WillReturnRows(sqlmock.NewRows(wsColumns).
AddRow("ws-nobudget", "Free Agent", "worker", 1, "online",
[]byte(`{}`), "http://localhost:9001",
nil, 0, 0.0, "", 0, "", "langgraph", "",
0.0, 0.0, false,
nil, // budget_limit NULL
0)) // monthly_spend 0
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-nobudget"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-nobudget", nil)
handler.Get(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
// #611: financial fields must NOT appear on the open GET endpoint.
if _, present := resp["budget_limit"]; present {
t.Errorf("budget_limit must not appear in open GET /workspaces/:id response")
}
if _, present := resp["monthly_spend"]; present {
t.Errorf("monthly_spend must not appear in open GET /workspaces/:id response")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestWorkspaceBudget_Get_WithLimit verifies that budget_limit and monthly_spend
// are stripped from the open GET /workspaces/:id even when the DB has non-zero
// values. Financial reads go through the AdminAuth-gated budget endpoint. (#611)
func TestWorkspaceBudget_Get_WithLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-limited").
WillReturnRows(sqlmock.NewRows(wsColumns).
AddRow("ws-limited", "Capped Agent", "worker", 1, "online",
[]byte(`{}`), "http://localhost:9002",
nil, 0, 0.0, "", 0, "", "langgraph", "",
0.0, 0.0, false,
int64(500), // budget_limit = $5.00 in DB
int64(123))) // monthly_spend = $1.23 in DB
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-limited"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-limited", nil)
handler.Get(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
// #611: financial fields must NOT appear on the open GET endpoint even when
// the DB has non-zero values — they're stripped before c.JSON().
if _, present := resp["budget_limit"]; present {
t.Errorf("budget_limit must not appear in open GET /workspaces/:id response (got %v)", resp["budget_limit"])
}
if _, present := resp["monthly_spend"]; present {
t.Errorf("monthly_spend must not appear in open GET /workspaces/:id response (got %v)", resp["monthly_spend"])
}
// Confirm non-financial fields are still present.
if resp["name"] != "Capped Agent" {
t.Errorf("expected name 'Capped Agent', got %v", resp["name"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// ==================== POST — create with budget_limit ====================
// TestWorkspaceBudget_Create_WithLimit verifies that POST /workspaces with
// a budget_limit passes the value as the 10th INSERT parameter ($10).
func TestWorkspaceBudget_Create_WithLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
budgetVal := int64(1000) // $10.00
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(
sqlmock.AnyArg(), // id
"Budgeted Agent", // name
nil, // role
1, // tier
"langgraph", // runtime
sqlmock.AnyArg(), // awareness_namespace
(*string)(nil), // parent_id
nil, // workspace_dir
"none", // workspace_access
&budgetVal, // budget_limit ($10)
).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WithArgs(sqlmock.AnyArg(), float64(0), float64(0)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Budgeted Agent","budget_limit":1000}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// ==================== PATCH — budget_limit silently ignored on general update ====================
// TestWorkspaceBudget_Update_SetLimit verifies that PATCH /workspaces/:id with
// budget_limit=500 does NOT issue any DB write for budget_limit. The only write
// path is the AdminAuth-gated PATCH /workspaces/:id/budget endpoint. (#611)
func TestWorkspaceBudget_Update_SetLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
// Only the existence probe fires; no UPDATE for budget_limit.
mock.ExpectQuery("SELECT EXISTS.*workspaces WHERE id").
WithArgs("ws-upd-budget").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// No ExpectExec for budget_limit — sqlmock will fail if one is issued.
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-upd-budget"}}
body := `{"budget_limit":500}`
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-upd-budget", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Update(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
// If a budget_limit UPDATE was issued, sqlmock would have an unexpected call.
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB activity — budget_limit must not be written via general Update: %v", err)
}
}
// TestWorkspaceBudget_Update_ClearLimit verifies that PATCH /workspaces/:id
// with budget_limit=null does NOT issue any DB write for budget_limit. (#611)
func TestWorkspaceBudget_Update_ClearLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
// Only the existence probe fires; no UPDATE for budget_limit.
mock.ExpectQuery("SELECT EXISTS.*workspaces WHERE id").
WithArgs("ws-clear-budget").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// No ExpectExec — a budget_limit write here would re-open the vulnerability.
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-clear-budget"}}
body := `{"budget_limit":null}`
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-clear-budget", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Update(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB activity — budget_limit must not be written via general Update: %v", err)
}
}
// ==================== A2A enforcement ====================
// TestWorkspaceBudget_A2A_ExceededReturns402 verifies that the A2A proxy
// returns HTTP 402 {"error":"workspace budget limit exceeded"} when
// monthly_spend equals budget_limit.
func TestWorkspaceBudget_A2A_ExceededReturns402(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
// Cache a URL so resolveAgentURL doesn't need a DB query after budget check
mr.Set(fmt.Sprintf("ws:%s:url", "ws-over-budget"), "http://localhost:9999")
// Budget check query: spend = limit → exceeded
mock.ExpectQuery("SELECT budget_limit, COALESCE").
WithArgs("ws-over-budget").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(500)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-over-budget"}}
body := `{"message":{"role":"user","parts":[{"text":"hello"}]}}`
c.Request = httptest.NewRequest("POST", "/workspaces/ws-over-budget/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.ProxyA2A(c)
if w.Code != http.StatusPaymentRequired {
t.Errorf("expected 402 when budget exceeded, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["error"] != "workspace budget limit exceeded" {
t.Errorf("expected 'workspace budget limit exceeded', got %v", resp["error"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestWorkspaceBudget_A2A_AboveLimitReturns402 verifies 402 when spend > limit.
func TestWorkspaceBudget_A2A_AboveLimitReturns402(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
mr.Set(fmt.Sprintf("ws:%s:url", "ws-way-over"), "http://localhost:9999")
// spend > limit
mock.ExpectQuery("SELECT budget_limit, COALESCE").
WithArgs("ws-way-over").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(100), int64(9999)))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-way-over"}}
body := `{"message":{"role":"user","parts":[{"text":"test"}]}}`
c.Request = httptest.NewRequest("POST", "/workspaces/ws-way-over/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.ProxyA2A(c)
if w.Code != http.StatusPaymentRequired {
t.Errorf("expected 402 when spend > limit, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestWorkspaceBudget_A2A_UnderLimitPassesThrough verifies that A2A calls
// succeed normally when monthly_spend is below budget_limit.
func TestWorkspaceBudget_A2A_UnderLimitPassesThrough(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
// Stand up a minimal mock agent that returns a valid A2A response
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{"status":"ok"}}`)
}))
defer agentServer.Close()
mr.Set(fmt.Sprintf("ws:%s:url", "ws-under-budget"), agentServer.URL)
// Budget check: spend (100) < limit (500) → pass-through
mock.ExpectQuery("SELECT budget_limit, COALESCE").
WithArgs("ws-under-budget").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(100)))
// Activity log INSERT from logA2ASuccess
mock.ExpectExec("INSERT INTO activity_logs").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-under-budget"}}
body := `{"jsonrpc":"2.0","id":"1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hello"}]}}}`
c.Request = httptest.NewRequest("POST", "/workspaces/ws-under-budget/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.ProxyA2A(c)
// Give the async logA2ASuccess goroutine a moment to fire
time.Sleep(50 * time.Millisecond)
if w.Code != http.StatusOK {
t.Errorf("expected 200 when under budget, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestWorkspaceBudget_A2A_NilLimitPassesThrough verifies that when
// budget_limit IS NULL (no ceiling set), A2A calls pass through unconditionally.
func TestWorkspaceBudget_A2A_NilLimitPassesThrough(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"2","result":{"status":"ok"}}`)
}))
defer agentServer.Close()
mr.Set(fmt.Sprintf("ws:%s:url", "ws-no-limit"), agentServer.URL)
// budget_limit NULL → no enforcement regardless of monthly_spend
mock.ExpectQuery("SELECT budget_limit, COALESCE").
WithArgs("ws-no-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(nil, int64(999999))) // huge spend but no limit set
mock.ExpectExec("INSERT INTO activity_logs").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-no-limit"}}
body := `{"jsonrpc":"2.0","id":"2","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
c.Request = httptest.NewRequest("POST", "/workspaces/ws-no-limit/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.ProxyA2A(c)
time.Sleep(50 * time.Millisecond)
if w.Code != http.StatusOK {
t.Errorf("expected 200 when no limit set, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestWorkspaceBudget_A2A_DBErrorFailOpen verifies that a DB error during the
// budget check is fail-open — the request proceeds rather than being blocked.
func TestWorkspaceBudget_A2A_DBErrorFailOpen(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"3","result":{"status":"ok"}}`)
}))
defer agentServer.Close()
mr.Set(fmt.Sprintf("ws:%s:url", "ws-db-err-budget"), agentServer.URL)
// Budget check fails with DB error → fail-open (request proceeds)
mock.ExpectQuery("SELECT budget_limit, COALESCE").
WithArgs("ws-db-err-budget").
WillReturnError(sql.ErrConnDone)
mock.ExpectExec("INSERT INTO activity_logs").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-db-err-budget"}}
body := `{"jsonrpc":"2.0","id":"3","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"fail-open test"}]}}}`
c.Request = httptest.NewRequest("POST", "/workspaces/ws-db-err-budget/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.ProxyA2A(c)
time.Sleep(50 * time.Millisecond)
if w.Code != http.StatusOK {
t.Errorf("expected 200 on DB error (fail-open), got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}

View File

@ -0,0 +1,147 @@
package handlers
import (
"context"
"database/sql"
"fmt"
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// Pricing constants — Claude Sonnet default rates (USD per token).
// Callers with different models should override via env vars in a future phase.
const (
tokenCostPerInputToken = 0.000003 // $3 / 1M input tokens
tokenCostPerOutputToken = 0.000015 // $15 / 1M output tokens
)
// MetricsHandler serves GET /workspaces/:id/metrics.
type MetricsHandler struct{}
// NewMetricsHandler returns a MetricsHandler.
func NewMetricsHandler() *MetricsHandler { return &MetricsHandler{} }
// GetMetrics handles GET /workspaces/:id/metrics.
//
// Returns aggregated LLM token usage for the current UTC day.
// Auth: WorkspaceAuth middleware (bearer token bound to :id).
//
// Response:
//
// {
// "input_tokens": <N>,
// "output_tokens": <N>,
// "total_calls": <N>,
// "estimated_cost_usd": "0.000000",
// "period_start": "2026-04-17T00:00:00Z",
// "period_end": "2026-04-18T00:00:00Z"
// }
func (h *MetricsHandler) GetMetrics(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Verify workspace exists — 404 before touching usage table.
var wsExists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
workspaceID,
).Scan(&wsExists); err != nil {
log.Printf("metrics: workspace check failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify workspace"})
return
}
if !wsExists {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
periodStart := todayUTC()
periodEnd := periodStart.Add(24 * time.Hour)
var inputTokens, outputTokens int64
var callCount int64
var estimatedCost float64
err := db.DB.QueryRowContext(ctx, `
SELECT
COALESCE(SUM(input_tokens), 0),
COALESCE(SUM(output_tokens), 0),
COALESCE(SUM(call_count), 0),
COALESCE(SUM(estimated_cost_usd), 0)
FROM workspace_token_usage
WHERE workspace_id = $1
AND period_start = $2
`, workspaceID, periodStart).Scan(&inputTokens, &outputTokens, &callCount, &estimatedCost)
if err != nil && err != sql.ErrNoRows {
log.Printf("metrics: query failed for workspace %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch metrics"})
return
}
c.JSON(http.StatusOK, gin.H{
"input_tokens": inputTokens,
"output_tokens": outputTokens,
"total_calls": callCount,
"estimated_cost_usd": fmt.Sprintf("%.6f", estimatedCost),
"period_start": periodStart.Format(time.RFC3339),
"period_end": periodEnd.Format(time.RFC3339),
})
}
// todayUTC returns the start of the current UTC day (midnight).
func todayUTC() time.Time {
now := time.Now().UTC()
return time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC)
}
// maxTokensPerCall is the per-call sanity cap applied before upsert (#615).
// An adversarial or buggy agent reporting INT64_MAX would otherwise cause a
// NUMERIC(12,6) overflow in Postgres (silent failure, no cross-workspace
// impact, but corrupts the workspace's cost accounting). 10 M tokens/call is
// generous for any real LLM API response; anything above is clamped.
const maxTokensPerCall = int64(10_000_000)
// upsertTokenUsage accumulates input/output token counts for workspaceID's
// current UTC day. Cost is estimated using the default per-token pricing
// constants. Always call in a detached goroutine — never block the A2A path.
func upsertTokenUsage(ctx context.Context, workspaceID string, inputTokens, outputTokens int64) {
// Clamp to safe range before any arithmetic — prevents NUMERIC overflow
// from adversarial or buggy agent responses (#615).
if inputTokens < 0 {
inputTokens = 0
}
if outputTokens < 0 {
outputTokens = 0
}
if inputTokens > maxTokensPerCall {
inputTokens = maxTokensPerCall
}
if outputTokens > maxTokensPerCall {
outputTokens = maxTokensPerCall
}
if inputTokens == 0 && outputTokens == 0 {
return
}
periodStart := todayUTC()
cost := float64(inputTokens)*tokenCostPerInputToken + float64(outputTokens)*tokenCostPerOutputToken
_, err := db.DB.ExecContext(ctx, `
INSERT INTO workspace_token_usage
(workspace_id, period_start, input_tokens, output_tokens, call_count, estimated_cost_usd, updated_at)
VALUES ($1, $2, $3, $4, 1, $5, NOW())
ON CONFLICT (workspace_id, period_start) DO UPDATE SET
input_tokens = workspace_token_usage.input_tokens + EXCLUDED.input_tokens,
output_tokens = workspace_token_usage.output_tokens + EXCLUDED.output_tokens,
call_count = workspace_token_usage.call_count + 1,
estimated_cost_usd = workspace_token_usage.estimated_cost_usd + EXCLUDED.estimated_cost_usd,
updated_at = NOW()
`, workspaceID, periodStart, inputTokens, outputTokens, cost)
if err != nil {
log.Printf("upsertTokenUsage: failed for workspace %s: %v", workspaceID, err)
}
}

View File

@ -0,0 +1,335 @@
package handlers
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// usageColumns matches the SELECT in GetMetrics.
var usageColumns = []string{
"sum_input_tokens", "sum_output_tokens", "sum_call_count", "sum_cost",
}
// expectWorkspaceExistsMetrics queues the EXISTS check in GetMetrics.
func expectWorkspaceExistsMetrics(mock sqlmock.Sqlmock, workspaceID string, exists bool) {
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(exists))
}
// TestGetMetrics_HappyPath verifies the handler returns correct aggregated data.
func TestGetMetrics_HappyPath(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ws-1", true)
// Simulate one row with usage data.
mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
WithArgs("ws-1", sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows(usageColumns).
AddRow(int64(1500), int64(300), int64(5), float64(0.009)))
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
TotalCalls int64 `json:"total_calls"`
EstimatedCost string `json:"estimated_cost_usd"`
PeriodStart string `json:"period_start"`
PeriodEnd string `json:"period_end"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON: %v\n%s", err, w.Body.String())
}
if resp.InputTokens != 1500 {
t.Errorf("expected input_tokens=1500, got %d", resp.InputTokens)
}
if resp.OutputTokens != 300 {
t.Errorf("expected output_tokens=300, got %d", resp.OutputTokens)
}
if resp.TotalCalls != 5 {
t.Errorf("expected total_calls=5, got %d", resp.TotalCalls)
}
if resp.EstimatedCost == "" {
t.Error("expected non-empty estimated_cost_usd")
}
if resp.PeriodStart == "" {
t.Error("expected non-empty period_start")
}
if resp.PeriodEnd == "" {
t.Error("expected non-empty period_end")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestGetMetrics_WorkspaceNotFound verifies a 404 when workspace is absent.
func TestGetMetrics_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ghost", false)
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ghost"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ghost/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestGetMetrics_EmptyPeriod verifies the handler returns zeros when no usage exists yet.
func TestGetMetrics_EmptyPeriod(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ws-new", true)
// COALESCE returns 0 for each column when no rows match.
mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
WithArgs("ws-new", sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows(usageColumns).
AddRow(int64(0), int64(0), int64(0), float64(0)))
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-new"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-new/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON: %v", err)
}
// Verify period_start and period_end are present and distinct.
ps, _ := resp["period_start"].(string)
pe, _ := resp["period_end"].(string)
if ps == "" || pe == "" {
t.Errorf("expected non-empty period_start/period_end, got %q / %q", ps, pe)
}
if ps == pe {
t.Errorf("period_start and period_end must differ, both are %q", ps)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet DB expectations: %v", err)
}
}
// TestGetMetrics_CostFormat verifies estimated_cost_usd is formatted to 6 decimal places.
func TestGetMetrics_CostFormat(t *testing.T) {
mock := setupTestDB(t)
expectWorkspaceExistsMetrics(mock, "ws-1", true)
mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
WithArgs("ws-1", sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows(usageColumns).
AddRow(int64(1000000), int64(0), int64(1), float64(3.0)))
h := NewMetricsHandler()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/metrics", nil)
h.GetMetrics(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON: %v", err)
}
cost, _ := resp["estimated_cost_usd"].(string)
if len(cost) < 8 {
// "3.000000" is 8 chars minimum
t.Errorf("expected at least 8-char cost string, got %q", cost)
}
}
// ---- upsertTokenUsage cap tests (#615) ----
// TestUpsertTokenUsage_615_CapsInt64Max verifies that an adversarial
// INT64_MAX token count is clamped to maxTokensPerCall before the upsert,
// preventing NUMERIC(12,6) overflow in Postgres.
func TestUpsertTokenUsage_615_CapsInt64Max(t *testing.T) {
mock := setupTestDB(t)
// We expect the INSERT to be called with maxTokensPerCall, not math.MaxInt64.
mock.ExpectExec(`INSERT INTO workspace_token_usage`).
WithArgs("ws-1", sqlmock.AnyArg(),
maxTokensPerCall, // input clamped
maxTokensPerCall, // output clamped
sqlmock.AnyArg()). // cost
WillReturnResult(sqlmock.NewResult(0, 1))
// INT64_MAX overflows — must be clamped.
const int64Max = int64(^uint64(0) >> 1)
upsertTokenUsage(t.Context(), "ws-1", int64Max, int64Max)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expected clamped values in upsert: %v", err)
}
}
// TestUpsertTokenUsage_615_CapsNegative verifies negative token counts are
// clamped to 0 before upsert (no negative accumulation in cost rows).
func TestUpsertTokenUsage_615_CapsNegative(t *testing.T) {
// Negative input + negative output → both become 0 → early return, no DB call.
setupTestDB(t) // no expectations
upsertTokenUsage(t.Context(), "ws-1", -100, -200)
// If any DB call were made the mock would error — passing here is the assertion.
}
// TestUpsertTokenUsage_615_NormalValuesUnchanged verifies that token counts
// within the valid range pass through to the DB unchanged.
func TestUpsertTokenUsage_615_NormalValuesUnchanged(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectExec(`INSERT INTO workspace_token_usage`).
WithArgs("ws-1", sqlmock.AnyArg(),
int64(1500), // input unchanged
int64(300), // output unchanged
sqlmock.AnyArg()). // cost
WillReturnResult(sqlmock.NewResult(0, 1))
upsertTokenUsage(t.Context(), "ws-1", 1500, 300)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("normal values altered unexpectedly: %v", err)
}
}
// TestUpsertTokenUsage_615_ExactlyAtCap verifies that a count exactly equal
// to maxTokensPerCall is accepted without clamping.
func TestUpsertTokenUsage_615_ExactlyAtCap(t *testing.T) {
mock := setupTestDB(t)
mock.ExpectExec(`INSERT INTO workspace_token_usage`).
WithArgs("ws-1", sqlmock.AnyArg(),
maxTokensPerCall,
maxTokensPerCall,
sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
upsertTokenUsage(t.Context(), "ws-1", maxTokensPerCall, maxTokensPerCall)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("at-cap values should not be altered: %v", err)
}
}
// ---- parseUsageFromA2AResponse tests ----
func TestParseUsage_JSONRPCResultEnvelope(t *testing.T) {
body := []byte(`{
"jsonrpc": "2.0",
"id": "abc",
"result": {
"usage": {
"input_tokens": 100,
"output_tokens": 50
}
}
}`)
in, out := parseUsageFromA2AResponse(body)
if in != 100 {
t.Errorf("expected input_tokens=100, got %d", in)
}
if out != 50 {
t.Errorf("expected output_tokens=50, got %d", out)
}
}
func TestParseUsage_TopLevelUsage(t *testing.T) {
body := []byte(`{
"usage": {
"input_tokens": 200,
"output_tokens": 75
}
}`)
in, out := parseUsageFromA2AResponse(body)
if in != 200 {
t.Errorf("expected input_tokens=200, got %d", in)
}
if out != 75 {
t.Errorf("expected output_tokens=75, got %d", out)
}
}
func TestParseUsage_NoUsageField(t *testing.T) {
body := []byte(`{"jsonrpc":"2.0","id":"x","result":{"message":"hello"}}`)
in, out := parseUsageFromA2AResponse(body)
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) with no usage field, got (%d, %d)", in, out)
}
}
func TestParseUsage_ZeroTokensIgnored(t *testing.T) {
body := []byte(`{"result":{"usage":{"input_tokens":0,"output_tokens":0}}}`)
in, out := parseUsageFromA2AResponse(body)
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) for zero tokens, got (%d, %d)", in, out)
}
}
func TestParseUsage_EmptyBody(t *testing.T) {
in, out := parseUsageFromA2AResponse([]byte{})
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) for empty body, got (%d, %d)", in, out)
}
}
func TestParseUsage_InvalidJSON(t *testing.T) {
in, out := parseUsageFromA2AResponse([]byte("not json"))
if in != 0 || out != 0 {
t.Errorf("expected (0, 0) for invalid JSON, got (%d, %d)", in, out)
}
}
func TestParseUsage_NestedResultPreferredOverTopLevel(t *testing.T) {
// result.usage should be preferred over top-level usage.
body := []byte(`{
"usage": {"input_tokens": 999, "output_tokens": 999},
"result": {
"usage": {"input_tokens": 42, "output_tokens": 21}
}
}`)
in, out := parseUsageFromA2AResponse(body)
if in != 42 {
t.Errorf("expected result.usage.input_tokens=42, got %d", in)
}
if out != 21 {
t.Errorf("expected result.usage.output_tokens=21, got %d", out)
}
}

View File

@ -24,13 +24,15 @@ func TestWorkspaceGet_Success(t *testing.T) {
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-get-1").
WillReturnRows(sqlmock.NewRows(columns).
AddRow("ws-get-1", "My Agent", "worker", 1, "online", []byte(`{"name":"test"}`),
"http://localhost:8001", nil, 2, 0.05, "", 3600, "working", "langgraph",
"", 10.0, 20.0, false))
"", 10.0, 20.0, false,
nil, 0))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -146,10 +148,12 @@ func TestWorkspaceCreate_DBInsertError(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// Workspace INSERT fails
// Transaction begins, workspace INSERT fails, transaction is rolled back.
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Failing Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WithArgs(sqlmock.AnyArg(), "Failing Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil)).
WillReturnError(sql.ErrConnDone)
mock.ExpectRollback()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -175,10 +179,13 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// Transaction wraps the workspace INSERT (no secrets in this request).
mock.ExpectBegin()
// Expect workspace INSERT with defaulted tier=1, runtime="langgraph"
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Default Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
WithArgs(sqlmock.AnyArg(), "Default Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
// Expect canvas_layouts INSERT (x=0, y=0 — defaults)
mock.ExpectExec("INSERT INTO canvas_layouts").
@ -215,6 +222,117 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) {
}
}
// TestWorkspaceCreate_WithSecrets_Persists asserts that secrets in the create
// payload are written to workspace_secrets inside the same transaction as the
// workspace row, and that the handler returns 201.
func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
// External workspace: simplest code path — no provisioner goroutine.
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WithArgs(sqlmock.AnyArg(), "Hermes Agent", nil, 1, "hermes", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil)).
WillReturnResult(sqlmock.NewResult(0, 1))
// Secret inserted inside the same transaction.
mock.ExpectExec("INSERT INTO workspace_secrets").
WithArgs(sqlmock.AnyArg(), "HERMES_API_KEY", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
// canvas_layouts (non-fatal, outside tx)
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Hermes Agent","runtime":"hermes","external":true,"secrets":{"HERMES_API_KEY":"sk-test-123"}}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestWorkspaceCreate_SecretPersistFails_RollsBack asserts that a DB error
// while persisting a secret causes the entire transaction to roll back and
// the handler to return 500. The workspace row must NOT be committed.
func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnError(sql.ErrConnDone) // DB failure while writing secret
mock.ExpectRollback() // workspace insert must be rolled back
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Rollback Agent","secrets":{"OPENAI_API_KEY":"sk-fail"}}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected status 500, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestWorkspaceCreate_EmptySecrets_OK asserts that an empty secrets map (or
// no secrets key at all) creates the workspace normally without touching
// workspace_secrets.
func TestWorkspaceCreate_EmptySecrets_OK(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
// No ExpectExec for workspace_secrets — empty map must be a no-op.
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"No Secrets Agent","external":true,"secrets":{}}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ==================== GET /workspaces (List) ====================
func TestWorkspaceList_Empty(t *testing.T) {
@ -228,6 +346,7 @@ func TestWorkspaceList_Empty(t *testing.T) {
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}))
w := httptest.NewRecorder()
@ -741,3 +860,132 @@ func TestWorkspaceUpdate_SensitiveField_NoTokensYet_FailOpen(t *testing.T) {
t.Errorf("bootstrap fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
}
}
// ==================== #611 Security Auditor regressions ====================
// TestWorkspaceGet_FinancialFieldsStripped verifies that GET /workspaces/:id
// does NOT expose budget_limit or monthly_spend. The endpoint is on the open
// router — any caller with a UUID would otherwise read billing data. (#611 Fix 2)
func TestWorkspaceGet_FinancialFieldsStripped(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
columns := []string{
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
// Populate with non-zero financial values to confirm they are stripped.
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-fin-1").
WillReturnRows(sqlmock.NewRows(columns).
AddRow("ws-fin-1", "Finance Test", "worker", 1, "online", []byte(`{}`),
"http://localhost:9001", nil, 0, 0.0, "", 0, "", "langgraph",
"", 0.0, 0.0, false,
int64(50000), int64(12500))) // budget_limit=500 USD, spend=125 USD
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-fin-1"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-fin-1", nil)
handler.Get(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
if _, present := resp["budget_limit"]; present {
t.Errorf("budget_limit must not appear in GET /workspaces/:id response (got %v)", resp["budget_limit"])
}
if _, present := resp["monthly_spend"]; present {
t.Errorf("monthly_spend must not appear in GET /workspaces/:id response (got %v)", resp["monthly_spend"])
}
// Sanity-check that normal fields are still present.
if resp["name"] != "Finance Test" {
t.Errorf("expected name 'Finance Test', got %v", resp["name"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestWorkspaceUpdate_BudgetLimitIgnored verifies that including budget_limit
// in a PATCH /workspaces/:id body does NOT trigger a DB write. The only write
// path for budget_limit is PATCH /workspaces/:id/budget (AdminAuth-gated).
// Any workspace bearer must not be able to self-clear its spending ceiling.
// (#611 Fix 1)
func TestWorkspaceUpdate_BudgetLimitIgnored(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// Only the existence probe fires — no UPDATE for budget_limit.
mock.ExpectQuery("SELECT EXISTS.*workspaces WHERE id").
WithArgs("ws-budget-test").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// name update is the only expected write
mock.ExpectExec("UPDATE workspaces SET name").
WithArgs("ws-budget-test", "Safe Name").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-budget-test"}}
// Send budget_limit alongside an innocuous field.
body := `{"name":"Safe Name","budget_limit":null}`
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-budget-test",
bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Update(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
// sqlmock will fail if any unexpected DB call was made (e.g. for budget_limit).
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB call — budget_limit must not be written via Update: %v", err)
}
}
// TestWorkspaceUpdate_BudgetLimitOnly_Ignored verifies that a body containing
// ONLY budget_limit results in no DB writes at all (besides the existence probe).
func TestWorkspaceUpdate_BudgetLimitOnly_Ignored(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectQuery("SELECT EXISTS.*workspaces WHERE id").
WithArgs("ws-budget-only").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// No UPDATE expected — budget_limit must be silently skipped.
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-budget-only"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-budget-only",
bytes.NewBufferString(`{"budget_limit":999999}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.Update(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB call for budget_limit: %v", err)
}
}

View File

@ -23,6 +23,7 @@ var apiPrefixes = []string{
"/settings",
"/bundles",
"/org",
"/orgs", // #610 — per-org plugin allowlist routes
"/templates",
"/plugins",
"/webhooks",

View File

@ -199,6 +199,52 @@ func TestCSPCanvasRoutesGetPermissivePolicy(t *testing.T) {
}
}
// TestSecurityHeaders_614_NosniffOnSSEAndAPIEndpoints is the acceptance test for
// issue #614 — verifies X-Content-Type-Options: nosniff and X-Frame-Options: DENY
// are present on API and SSE paths. SecurityHeaders() was already wired globally
// in router.go (issue #151), so this test pins that contract against regression.
func TestSecurityHeaders_614_NosniffOnSSEAndAPIEndpoints(t *testing.T) {
r := gin.New()
r.Use(SecurityHeaders())
// Register a sample of high-value endpoints that #614 flagged.
r.GET("/workspaces/ws-1/events/stream", func(c *gin.Context) {
c.Header("Content-Type", "text/event-stream")
c.String(http.StatusOK, "data: ping\n\n")
})
r.GET("/settings/secrets", func(c *gin.Context) {
c.JSON(http.StatusOK, nil)
})
r.GET("/events/ws-1", func(c *gin.Context) {
c.JSON(http.StatusOK, nil)
})
r.GET("/orgs/org-1/plugins/allowlist", func(c *gin.Context) {
c.JSON(http.StatusOK, nil)
})
paths := []string{
"/workspaces/ws-1/events/stream",
"/settings/secrets",
"/events/ws-1",
"/orgs/org-1/plugins/allowlist",
}
for _, path := range paths {
t.Run(path, func(t *testing.T) {
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, path, nil)
r.ServeHTTP(w, req)
if got := w.Header().Get("X-Content-Type-Options"); got != "nosniff" {
t.Errorf("#614 %s: X-Content-Type-Options = %q, want nosniff", path, got)
}
if got := w.Header().Get("X-Frame-Options"); got != "DENY" {
t.Errorf("#614 %s: X-Frame-Options = %q, want DENY", path, got)
}
})
}
}
// TestIsAPIPath unit-tests the path classifier directly.
func TestIsAPIPath(t *testing.T) {
cases := []struct {
@ -221,6 +267,8 @@ func TestIsAPIPath(t *testing.T) {
{"/ws", true},
{"/events", true},
{"/approvals", true},
{"/orgs", true}, // #610 allowlist routes
{"/orgs/org-1/plugins/allowlist", true},
// Sub-paths
{"/workspaces/abc-123", true},
{"/workspaces/abc-123/state", true},

View File

@ -81,6 +81,13 @@ func TenantGuardWithOrgID(configuredOrgID string) gin.HandlerFunc {
c.Next()
return
}
// Tertiary: same-origin Canvas requests on tenant EC2 instances where
// Caddy serves Canvas (:3000) and API (:8080) under the same domain.
// CANVAS_PROXY_URL is set → Referer/Origin matches Host → trusted.
if isSameOriginCanvas(c) {
c.Next()
return
}
// 404 not 403 — existence of this tenant must not be inferable by
// probing other orgs' machines.
c.AbortWithStatus(404)

View File

@ -133,6 +133,64 @@ func TestOrgIDFromReplaySrc(t *testing.T) {
}
}
// Same-origin Canvas bypass: when CANVAS_PROXY_URL is set and Referer matches
// Host, the request is from the co-served Canvas and should pass through.
func TestTenantGuard_SameOriginCanvasBypass(t *testing.T) {
origActive := canvasProxyActive
canvasProxyActive = true
defer func() { canvasProxyActive = origActive }()
r := newGuardedRouter("org-abc")
req := httptest.NewRequest("GET", "/workspaces", nil)
req.Host = "molecule1.moleculesai.app"
req.Header.Set("Referer", "https://molecule1.moleculesai.app/")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 200 {
t.Errorf("same-origin canvas: expected 200, got %d", w.Code)
}
}
// Same-origin Canvas bypass via Origin header (WebSocket upgrade path).
func TestTenantGuard_SameOriginCanvasViaOrigin(t *testing.T) {
origActive := canvasProxyActive
canvasProxyActive = true
defer func() { canvasProxyActive = origActive }()
r := newGuardedRouter("org-abc")
req := httptest.NewRequest("GET", "/workspaces", nil)
req.Host = "molecule1.moleculesai.app"
req.Header.Set("Origin", "https://molecule1.moleculesai.app")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 200 {
t.Errorf("same-origin canvas via Origin: expected 200, got %d", w.Code)
}
}
// Same-origin Canvas bypass must NOT work when CANVAS_PROXY_URL is unset.
func TestTenantGuard_SameOriginCanvasInactiveWithoutEnv(t *testing.T) {
origActive := canvasProxyActive
canvasProxyActive = false
defer func() { canvasProxyActive = origActive }()
r := newGuardedRouter("org-abc")
req := httptest.NewRequest("GET", "/workspaces", nil)
req.Host = "molecule1.moleculesai.app"
req.Header.Set("Referer", "https://molecule1.moleculesai.app/")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 404 {
t.Errorf("same-origin canvas without CANVAS_PROXY_URL: expected 404, got %d", w.Code)
}
}
// The allowlist is exact-match, not prefix. "/health/debug" must NOT bypass.
func TestTenantGuard_AllowlistIsExactMatch(t *testing.T) {
gin.SetMode(gin.TestMode)

View File

@ -67,10 +67,17 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
// Same lazy-bootstrap contract as WorkspaceAuth: if no live token exists
// anywhere on the platform (fresh install / pre-Phase-30 upgrade), requests
// are let through so existing deployments keep working. Once any workspace
// has a live token every request to these routes MUST present a valid one.
// has a live token every request to these routes MUST present a valid bearer
// token — no Origin-based bypass. (#623)
//
// Any valid workspace bearer token is accepted — the route is not scoped to
// a specific workspace so we only verify the token is live and unrevoked.
//
// NOTE: canvasOriginAllowed / isSameOriginCanvas are intentionally NOT called
// here. The Origin header is trivially forgeable by any container on the
// Docker network; using it as an auth bypass would let an attacker reach
// /settings/secrets, /bundles/import, /events, etc. without a bearer token.
// Those short-circuits belong ONLY in CanvasOrBearer (cosmetic routes).
func AdminAuth(database *sql.DB) gin.HandlerFunc {
return func(c *gin.Context) {
ctx := c.Request.Context()
@ -82,7 +89,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
return
}
if hasLive {
// Bearer token path — agents, CLI, and API clients.
// Bearer token is the ONLY accepted credential for admin routes.
tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
if tok != "" {
if err := wsauth.ValidateAnyToken(ctx, database, tok); err != nil {
@ -92,16 +99,6 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
c.Next()
return
}
// Canvas origin path — cross-origin canvas (CORS_ORIGINS match).
if canvasOriginAllowed(c.GetHeader("Origin")) {
c.Next()
return
}
// Same-origin canvas path — tenant image where canvas + API share a host.
if isSameOriginCanvas(c) {
c.Next()
return
}
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "admin auth required"})
return
}
@ -220,19 +217,25 @@ func isSameOriginCanvas(c *gin.Context) bool {
if !canvasProxyActive {
return false
}
referer := c.GetHeader("Referer")
if referer == "" {
return false
}
host := c.Request.Host
if host == "" {
return false
}
// Referer must start with https://<host>/ or http://<host>/ (trailing
// slash required to prevent hongming-wang.moleculesai.app.evil.com from
// matching hongming-wang.moleculesai.app).
return strings.HasPrefix(referer, "https://"+host+"/") ||
strings.HasPrefix(referer, "http://"+host+"/") ||
referer == "https://"+host ||
referer == "http://"+host
// Check Referer first (standard browser requests).
referer := c.GetHeader("Referer")
if referer != "" {
// Referer must start with https://<host>/ or http://<host>/ (trailing
// slash required to prevent hongming-wang.moleculesai.app.evil.com from
// matching hongming-wang.moleculesai.app).
if strings.HasPrefix(referer, "https://"+host+"/") ||
strings.HasPrefix(referer, "http://"+host+"/") ||
referer == "https://"+host ||
referer == "http://"+host {
return true
}
}
// Fallback: check Origin header (WebSocket upgrade requests may not have
// Referer but always send Origin).
origin := c.GetHeader("Origin")
return origin == "https://"+host || origin == "http://"+host
}

View File

@ -778,3 +778,116 @@ func TestCanvasOriginAllowed_LocalhostDefault(t *testing.T) {
t.Error("random origin should not be allowed")
}
}
// ── Issue #623 regression ─────────────────────────────────────────────────────
// AdminAuth must NOT accept forged Origin headers. Any container on the Docker
// network can set Origin: http://localhost:3000 without a bearer token, which
// previously bypassed AdminAuth on ALL admin-gated routes. (#623, dup #626)
// TestAdminAuth_623_ForgedOrigin_Returns401 — the main regression test:
// a request with a matching CORS origin but no bearer token must be rejected.
func TestAdminAuth_623_ForgedOrigin_Returns401(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
}
defer mockDB.Close()
// Platform has live tokens — AdminAuth is active.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
t.Setenv("CORS_ORIGINS", "http://localhost:3000")
r := gin.New()
r.GET("/settings/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"secrets": []string{"OPENAI_API_KEY"}})
})
w := httptest.NewRecorder()
// #623 attack: forge the canvas Origin header — no bearer token.
req, _ := http.NewRequest(http.MethodGet, "/settings/secrets", nil)
req.Header.Set("Origin", "http://localhost:3000")
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("#623 forged Origin bypass: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestAdminAuth_623_ForgedCORSOrigin_Returns401 — variant: attacker uses the
// tenant-domain CORS origin from CORS_ORIGINS (not just localhost).
func TestAdminAuth_623_ForgedCORSOrigin_Returns401(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
}
defer mockDB.Close()
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
t.Setenv("CORS_ORIGINS", "https://acme.moleculesai.app")
r := gin.New()
r.GET("/admin/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
req.Header.Set("Origin", "https://acme.moleculesai.app")
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("#623 forged tenant Origin: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestAdminAuth_623_ValidBearer_WithOrigin_Passes — bearer + matching Origin
// should still work (the Origin is irrelevant once the bearer validates).
func TestAdminAuth_623_ValidBearer_WithOrigin_Passes(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
}
defer mockDB.Close()
goodToken := "valid-bearer-token-xyz"
tokenHash := sha256.Sum256([]byte(goodToken))
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
mock.ExpectQuery(validateAnyTokenSelectQuery).
WithArgs(tokenHash[:]).
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("tok-1"))
mock.ExpectExec(validateTokenUpdateQuery).
WithArgs("tok-1").
WillReturnResult(sqlmock.NewResult(0, 1))
t.Setenv("CORS_ORIGINS", "http://localhost:3000")
r := gin.New()
r.GET("/settings/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, "/settings/secrets", nil)
req.Header.Set("Authorization", "Bearer "+goodToken)
req.Header.Set("Origin", "http://localhost:3000") // present but irrelevant
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("bearer+origin: expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}

View File

@ -44,6 +44,12 @@ type HeartbeatPayload struct {
ActiveTasks int `json:"active_tasks"`
UptimeSeconds int `json:"uptime_seconds"`
CurrentTask string `json:"current_task"`
// MonthlySpend is cumulative USD spend for the current calendar month,
// denominated in cents (e.g. 1500 = $15.00). Zero means "no update" —
// the heartbeat handler never writes zero to avoid accidentally clearing
// a previously-reported spend value. Any non-zero value is clamped to
// [0, maxMonthlySpend] before the DB write. (#615)
MonthlySpend int64 `json:"monthly_spend"`
}
type UpdateCardPayload struct {
@ -63,6 +69,13 @@ type CreateWorkspacePayload struct {
WorkspaceDir string `json:"workspace_dir"` // host path to mount as /workspace (empty = isolated volume)
WorkspaceAccess string `json:"workspace_access"` // "none" (default), "read_only", or "read_write" — see #65
ParentID *string `json:"parent_id"`
// BudgetLimit is the optional monthly spend ceiling in USD cents.
// NULL (omitted) means no limit. budget_limit=500 means $5.00/month.
BudgetLimit *int64 `json:"budget_limit"`
// Secrets is an optional map of key→plaintext-value pairs to persist as
// workspace secrets at creation time. Stored encrypted (same path as
// POST /workspaces/:id/secrets). Nil/empty map is a no-op.
Secrets map[string]string `json:"secrets"`
Canvas struct {
X float64 `json:"x"`
Y float64 `json:"y"`

View File

@ -0,0 +1,101 @@
package router
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
"github.com/gin-gonic/gin"
)
// buildTestTokenEngine builds a minimal Gin engine containing only the
// test-token route with AdminAuth middleware — the same registration that
// router.go now uses. Allows us to verify the auth gate is enforced at the
// HTTP layer without spinning up the full Setup() dependency graph.
func buildTestTokenEngine(t *testing.T) gin.IRouter {
t.Helper()
gin.SetMode(gin.TestMode)
r := gin.New()
tokh := handlers.NewAdminTestTokenHandler()
r.GET("/admin/workspaces/:id/test-token", middleware.AdminAuth(db.DB), tokh.GetTestToken)
return r
}
// setupRouterTestDB initialises db.DB with a sqlmock connection and returns
// the mock controller. Restores db.DB on test cleanup.
func setupRouterTestDB(t *testing.T) sqlmock.Sqlmock {
t.Helper()
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
prev := db.DB
db.DB = mockDB
t.Cleanup(func() {
db.DB = prev
mockDB.Close()
})
return mock
}
// TestTestTokenRoute_RequiresAdminAuth_WhenTokensExist verifies that once the
// platform has at least one live token, the test-token endpoint returns 401
// for callers that provide no Authorization header. This is the core security
// property added by the fix — without AdminAuth in the router the request
// would reach the handler and mint a new bearer for any workspace UUID.
func TestTestTokenRoute_RequiresAdminAuth_WhenTokensExist(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development") // enable the handler itself
mock := setupRouterTestDB(t)
// HasAnyLiveTokenGlobal: platform has one enrolled workspace.
mock.ExpectQuery("SELECT COUNT.*FROM workspace_auth_tokens").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
r := buildTestTokenEngine(t)
w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/admin/workspaces/ws-target/test-token", nil)
// No Authorization header — should be rejected by AdminAuth.
r.(http.Handler).ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("expected 401 when tokens exist and no auth header, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// TestTestTokenRoute_FailOpenOnFreshInstall verifies that AdminAuth is
// fail-open on a fresh install (HasAnyLiveTokenGlobal == 0), so the test-token
// bootstrap path still works before the first workspace has registered.
func TestTestTokenRoute_FailOpenOnFreshInstall(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development")
mock := setupRouterTestDB(t)
// HasAnyLiveTokenGlobal: no tokens yet — fresh install.
mock.ExpectQuery("SELECT COUNT.*FROM workspace_auth_tokens").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
// Handler's own DB queries: workspace existence check + token insert.
mock.ExpectQuery("SELECT id FROM workspaces WHERE id =").
WithArgs("ws-bootstrap").
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-bootstrap"))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WillReturnResult(sqlmock.NewResult(0, 1))
r := buildTestTokenEngine(t)
w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/admin/workspaces/ws-bootstrap/test-token", nil)
r.(http.Handler).ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200 on fresh install (fail-open), got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}

View File

@ -256,6 +256,14 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// (mirrors the /workspaces/:id/a2a pattern). Issue #249.
r.GET("/workspaces/:id/schedules/health", schedh.Health)
// Budget — per-workspace spend ceiling and current usage (#541).
// GET stays on wsAuth — a workspace agent reading its own budget is legitimate.
// PATCH is admin-only — workspace agents must not be able to self-clear their
// spending ceiling (that would defeat the entire budget enforcement feature).
budgeth := handlers.NewBudgetHandler()
wsAuth.GET("/budget", budgeth.GetBudget)
r.PATCH("/workspaces/:id/budget", middleware.AdminAuth(db.DB), budgeth.PatchBudget)
// Token management (user-facing create/list/revoke)
tokh := handlers.NewTokenHandler()
wsAuth.GET("/tokens", tokh.List)
@ -279,6 +287,22 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
wsAuth.PUT("/secrets", sech.Set)
wsAuth.DELETE("/secrets/:key", sech.Delete)
wsAuth.GET("/model", sech.GetModel)
// Token usage metrics — cost transparency (#593).
// WorkspaceAuth middleware (on wsAuth) binds the bearer to :id.
mtrh := handlers.NewMetricsHandler()
wsAuth.GET("/metrics", mtrh.GetMetrics)
// Cloudflare Artifacts demo integration (#595).
// All four routes require workspace-scoped bearer auth (wsAuth).
// CF credentials read from CF_ARTIFACTS_API_TOKEN / CF_ARTIFACTS_NAMESPACE;
// missing credentials return 503 so the handler still registers in
// every deployment — the demo is gated on env vars, not compilation.
arth := handlers.NewArtifactsHandler()
wsAuth.POST("/artifacts", arth.Create)
wsAuth.GET("/artifacts", arth.Get)
wsAuth.POST("/artifacts/fork", arth.Fork)
wsAuth.POST("/artifacts/token", arth.Token)
}
// Global secrets — /settings/secrets is the canonical path; /admin/secrets kept for backward compat.
@ -297,11 +321,24 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
}
// Admin — test token minting (issue #6). Hidden in production via TestTokensEnabled().
// Registered at root (not inside AdminAuth) because it is itself the bootstrap for
// acquiring a token, and it's gated on MOLECULE_ENV / MOLECULE_ENABLE_TEST_TOKENS.
// AdminAuth is a second defence-in-depth layer: on a fresh install with no tokens yet,
// AdminAuth is fail-open (HasAnyLiveTokenGlobal == 0), so the bootstrap still works.
// Once any token exists, callers must present a valid bearer — unauthenticated workspace-
// UUID enumeration is blocked even on non-production instances.
{
tokh := handlers.NewAdminTestTokenHandler()
r.GET("/admin/workspaces/:id/test-token", tokh.GetTestToken)
r.GET("/admin/workspaces/:id/test-token", middleware.AdminAuth(db.DB), tokh.GetTestToken)
}
// Admin — GitHub App installation token refresh (issue #547).
// Long-running workspaces (>60 min) use this endpoint to refresh
// GH_TOKEN without restarting. Returns the current installation token
// from the github-app-auth plugin's in-process cache (which proactively
// refreshes 5 min before expiry). 404 when no GitHub App is configured
// (dev / self-hosted without GITHUB_APP_ID).
{
ghTokH := handlers.NewGitHubTokenHandler(wh.TokenRegistry())
r.GET("/admin/github-installation-token", middleware.AdminAuth(db.DB), ghTokH.GetInstallationToken)
}
// Terminal — shares Docker client with provisioner
@ -390,6 +427,16 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// depth keeps the route behind AdminAuth regardless.
r.POST("/org/import", middleware.AdminAuth(db.DB), orgh.Import)
// Org plugin allowlist — tool governance (#591).
// Both endpoints are admin-gated: reading the allowlist reveals approved
// tooling policy; writing it enforces org-level install governance.
{
allowlistAdmin := r.Group("", middleware.AdminAuth(db.DB))
aplh := handlers.NewOrgPluginAllowlistHandler()
allowlistAdmin.GET("/orgs/:id/plugins/allowlist", aplh.GetAllowlist)
allowlistAdmin.PUT("/orgs/:id/plugins/allowlist", aplh.PutAllowlist)
}
// Channels (social integrations — Telegram, Slack, Discord, etc.)
chh := handlers.NewChannelHandler(channelMgr)
r.GET("/channels/adapters", chh.ListAdapters)
@ -408,6 +455,11 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
r.POST("/channels/discover", middleware.AdminAuth(db.DB), chh.Discover)
r.POST("/webhooks/:type", chh.Webhook)
// SSE — AG-UI compatible event stream per workspace (#590).
// WorkspaceAuth middleware (on wsAuth) binds the bearer token to :id.
sseh := handlers.NewSSEHandler(broadcaster)
wsAuth.GET("/events/stream", sseh.StreamEvents)
// WebSocket
sh := handlers.NewSocketHandler(hub)
r.GET("/ws", sh.HandleConnect)

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS workspace_token_usage;

View File

@ -0,0 +1,17 @@
-- Per-workspace LLM token usage tracking (#593 — canvas cost transparency).
-- Stores UTC-day aggregates upserted by the A2A proxy after each LLM call.
-- estimated_cost_usd is computed server-side using fixed per-model rates
-- (default: Claude Sonnet input $3/1M, output $15/1M).
CREATE TABLE IF NOT EXISTS workspace_token_usage (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id UUID NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
period_start TIMESTAMPTZ NOT NULL,
input_tokens BIGINT NOT NULL DEFAULT 0,
output_tokens BIGINT NOT NULL DEFAULT 0,
call_count INTEGER NOT NULL DEFAULT 0,
estimated_cost_usd NUMERIC(12,6) NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS workspace_token_usage_ws_period
ON workspace_token_usage(workspace_id, period_start);

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS org_plugin_allowlist;

View File

@ -0,0 +1,17 @@
-- Per-org plugin allowlist for tool governance (#591).
-- When an org has at least one entry in this table, workspace agents may only
-- install plugins listed here. An empty allowlist means "allow all" (backward
-- compatible with existing deployments).
--
-- org_id references the root/parent workspace that acts as the org anchor.
-- enabled_by records the workspace ID of the admin who added the entry.
CREATE TABLE IF NOT EXISTS org_plugin_allowlist (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
org_id UUID NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
plugin_name TEXT NOT NULL,
enabled_by TEXT NOT NULL,
enabled_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS org_plugin_allowlist_org_plugin
ON org_plugin_allowlist(org_id, plugin_name);

View File

@ -0,0 +1,3 @@
ALTER TABLE workspaces
DROP COLUMN IF EXISTS budget_limit,
DROP COLUMN IF EXISTS monthly_spend;

View File

@ -0,0 +1,11 @@
-- Per-workspace monthly budget limit (#541).
-- NULL means no limit. When monthly_spend reaches budget_limit, the A2A
-- proxy returns 402 {"error":"workspace budget limit exceeded"} and rejects
-- further A2A calls until budget_limit is raised or monthly_spend is reset.
--
-- Units: USD cents (integer). budget_limit=500 means $5.00/month.
-- monthly_spend is updated by the workspace via the heartbeat endpoint;
-- agents report their accumulated LLM API cost each heartbeat cycle.
ALTER TABLE workspaces
ADD COLUMN IF NOT EXISTS budget_limit BIGINT DEFAULT NULL,
ADD COLUMN IF NOT EXISTS monthly_spend BIGINT NOT NULL DEFAULT 0;

View File

@ -0,0 +1,2 @@
-- Reverse of 028_workspace_artifacts.up.sql
DROP TABLE IF EXISTS workspace_artifacts;

View File

@ -0,0 +1,31 @@
-- 028_workspace_artifacts: store Cloudflare Artifacts repo linkage per workspace.
--
-- Each workspace can be linked to exactly one Cloudflare Artifacts repo
-- (the primary snapshot store). Additional repos (forks) are ephemeral and
-- tracked only via the CF API — not in this table.
--
-- Remote URLs are stored for informational display only; callers must
-- call POST /workspaces/:id/artifacts/token to obtain a fresh git credential.
CREATE TABLE IF NOT EXISTS workspace_artifacts (
id TEXT NOT NULL DEFAULT gen_random_uuid()::text,
workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
cf_repo_name TEXT NOT NULL,
cf_namespace TEXT NOT NULL,
-- remote_url is the base Git remote (without embedded credentials).
-- Credentials are obtained on-demand via POST /tokens.
remote_url TEXT,
description TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT workspace_artifacts_pkey PRIMARY KEY (id)
);
-- Each workspace may have at most one linked CF Artifacts repo.
CREATE UNIQUE INDEX IF NOT EXISTS uq_workspace_artifacts_workspace_id
ON workspace_artifacts (workspace_id);
-- Allow fast lookup by CF repo name within a namespace.
CREATE INDEX IF NOT EXISTS idx_workspace_artifacts_cf_repo
ON workspace_artifacts (cf_namespace, cf_repo_name);

View File

@ -48,6 +48,7 @@ import (
"context"
"fmt"
"sync"
"time"
)
// EnvMutator is implemented by plugins that want to inject env vars
@ -64,6 +65,34 @@ type EnvMutator interface {
MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error
}
// TokenProvider is an optional interface that EnvMutator implementations
// may also satisfy. When a mutator implements TokenProvider the platform
// can serve GET /admin/github-installation-token, allowing long-running
// workspaces to fetch a fresh GitHub token without restarting.
//
// # Why a separate interface?
//
// EnvMutator.MutateEnv is called once at provision time and writes into
// an env map. Calling it again just to read the current token would be
// semantically wrong and potentially unsafe (the env map is a live
// workspace struct). TokenProvider cleanly separates "what do I inject
// at boot?" from "what is the live token right now?".
//
// # Plugin contract
//
// Token must return the current valid token and the time at which it
// will expire. If the plugin's internal cache is past its refresh
// threshold it must block until a new token is obtained before
// returning. Token should never return an expired token — callers rely
// on this guarantee and do not do their own expiry check.
//
// Returning a non-nil error causes the HTTP handler to respond 500 and
// log "[github] token refresh failed: <err>". The workspace will retry
// on its next credential-helper invocation.
type TokenProvider interface {
Token(ctx context.Context) (token string, expiresAt time.Time, err error)
}
// Registry holds the ordered list of EnvMutator instances the
// provisioner runs before each workspace boot. Safe for concurrent
// registration + execution.
@ -112,6 +141,26 @@ func (r *Registry) Names() []string {
return names
}
// FirstTokenProvider returns the first registered mutator that also
// implements TokenProvider, or nil if none do. Used to back the
// GET /admin/github-installation-token endpoint so long-running
// workspaces can refresh their GITHUB_TOKEN without a container restart.
//
// A nil registry returns nil (no provider configured).
func (r *Registry) FirstTokenProvider() TokenProvider {
if r == nil {
return nil
}
r.mu.RLock()
defer r.mu.RUnlock()
for _, m := range r.mutators {
if tp, ok := m.(TokenProvider); ok {
return tp
}
}
return nil
}
// Run calls every registered mutator in order. The first one to return
// a non-nil error aborts the chain — subsequent mutators do NOT run,
// and the error is returned to the caller (which marks the workspace

View File

@ -49,6 +49,13 @@ RUN ln -s /app/a2a_cli.py /usr/local/bin/a2a && chmod +x /app/a2a_cli.py /app/a2
COPY scripts/gh-wrapper.sh /usr/local/bin/gh
RUN chmod +x /usr/local/bin/gh
# Copy the git credential helper so entrypoint.sh can register it at boot.
# molecule-git-token-helper.sh fetches a fresh GitHub App installation token
# from the platform on every git push/fetch, preventing stale-token failures
# after the ~60 min GitHub App token TTL (issue #613 / #547).
COPY scripts/molecule-git-token-helper.sh ./scripts/
RUN chmod +x ./scripts/molecule-git-token-helper.sh
# Dirs and permissions
RUN mkdir -p /workspace /plugins /home/agent/.claude /home/agent/.config /home/agent/.local && \
chown -R agent:agent /app /home/agent /workspace

View File

@ -55,6 +55,31 @@ else:
echo "=== Molecule AI Workspace ==="
echo "Runtime: $RUNTIME"
# ──────────────────────────────────────────────────────────
# GitHub credential helper — issue #547
# ──────────────────────────────────────────────────────────
# GitHub App installation tokens expire after ~60 min. The platform
# exposes GET /admin/github-installation-token (backed by the plugin's
# in-process refreshing cache) so workspaces can always get a valid
# token without restarting.
#
# Register molecule-git-token-helper.sh as the git credential helper for
# github.com. git calls it on every push/fetch; it hits the platform
# endpoint and emits a fresh token. Falls through to any existing
# credential helper (e.g. operator .env PAT) if the platform is
# unreachable.
#
# Idempotent — safe to re-run on restart.
HELPER_SCRIPT="/app/scripts/molecule-git-token-helper.sh"
if [ -f "${HELPER_SCRIPT}" ]; then
git config --global \
"credential.https://github.com.helper" \
"!${HELPER_SCRIPT}" 2>/dev/null || true
echo "[entrypoint] git credential helper registered (molecule-git-token-helper)"
else
echo "[entrypoint] WARNING: molecule-git-token-helper.sh not found at ${HELPER_SCRIPT} — GitHub tokens may expire after 60 min"
fi
# NOTE: Adapter-specific deps are now pre-installed in each adapter's Docker image
# (standalone template repos). Each image installs molecule-ai-workspace-runtime
# from PyPI plus the adapter-specific requirements. No per-runtime pip install needed here.

View File

@ -21,6 +21,37 @@ OTEL activity span so operators can inspect the thinking trace in Langfuse
A2A reply doing so would contaminate the agent's next-turn context with
the model's internal scratchpad.
Native tools (#497)
-------------------
Tool definitions are passed via the OpenAI-native ``tools`` parameter instead
of injecting them as text into the system prompt. Each entry must follow the
standard OpenAI function-calling schema::
{
"type": "function",
"function": {
"name": "...",
"description": "...",
"parameters": { # JSON Schema object
"type": "object",
"properties": {...},
"required": [...]
}
}
}
**Empty list rule:** when ``tools`` is ``None`` or ``[]``, the ``tools``
parameter is **omitted** from the API call entirely. Sending ``tools=[]``
to some OpenAI-compat providers causes a 400 / unexpected behaviour; omitting
the key is always safe and signals "no tool use."
**Tool-call response handling:** when the model returns
``choice.message.tool_calls`` with no text content (``finish_reason`` is
``"tool_calls"``), the executor serialises the tool-call list as a JSON string
and enqueues that as the A2A reply. This keeps the executor thin (single API
call per turn, no ReAct loop) while surfacing function-call intent to the
caller in a structured, parseable format.
Hermes 3 / unknown models
--------------------------
No ``extra_body`` is sent. The response is processed identically to any
@ -189,6 +220,7 @@ class HermesA2AExecutor(AgentExecutor):
- System prompt injected as the first ``messages[]`` entry.
- Hermes 4 reasoning enabled via ``extra_body`` when supported.
- Reasoning trace logged to OTEL span never echoed in the reply.
- Tool definitions passed via native ``tools`` parameter when supplied.
Parameters
----------
@ -208,13 +240,18 @@ class HermesA2AExecutor(AgentExecutor):
response_format:
Optional OpenAI-native ``response_format`` dict forwarded verbatim
to ``chat.completions.create()``. Supported types:
``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}
``
``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}``
``{"type": "json_object"}``
``{"type": "text"}``
When ``None`` (default) the parameter is omitted from the API call.
Invalid dicts cause ``execute()`` to enqueue an error and return
early without calling the API.
tools:
Optional list of OpenAI-format tool definitions to pass via the
native ``tools`` parameter. Each entry must have ``"type"`` and
``"function"`` keys matching the OpenAI function-calling schema.
``None`` or ``[]`` the ``tools`` key is **omitted** from the API
call entirely (never sent as ``tools=[]``).
_client:
Inject a pre-built ``AsyncOpenAI`` (or compatible mock) for
testing only. When provided, ``base_url`` and ``api_key`` are
@ -229,6 +266,7 @@ class HermesA2AExecutor(AgentExecutor):
api_key: str | None = None,
heartbeat: "HeartbeatLoop | None" = None,
response_format: "dict | None" = None,
tools: list[dict] | None = None,
_client: Any = None,
) -> None:
self.model = model
@ -236,6 +274,9 @@ class HermesA2AExecutor(AgentExecutor):
self._heartbeat = heartbeat
self._response_format = response_format
self._provider = ProviderConfig(model)
# Empty list and None are treated identically: no tools → omit the
# parameter from the API call rather than sending tools=[].
self._tools: list[dict] = list(tools) if tools else []
if _client is not None:
# Test injection path — skip real AsyncOpenAI construction so
@ -320,10 +361,15 @@ class HermesA2AExecutor(AgentExecutor):
Sequence:
1. Extract user text from A2A message parts.
2. Build ``messages[]`` (optional system + user).
3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4.
3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4 and
``tools`` when tool definitions are configured.
4. Extract and log reasoning trace does NOT appear in the reply.
5. Enqueue a final ``Message`` with the content text.
5a. If the model returned text content, enqueue it as the reply.
5b. If the model returned tool calls with no text (``finish_reason``
``"tool_calls"``), serialise the calls as JSON and enqueue that.
"""
import json
from shared_runtime import extract_message_text
user_input = extract_message_text(context)
@ -353,8 +399,8 @@ class HermesA2AExecutor(AgentExecutor):
if self._provider.reasoning_supported:
extra_body = {"reasoning": {"enabled": True}}
# Build create() kwargs; omit response_format entirely when None so
# strict / older providers do not receive an unexpected field.
# Build create() kwargs; omit response_format and tools entirely when
# not set so strict / older providers do not receive unexpected fields.
create_kwargs: dict = {
"model": self.model,
"messages": messages,
@ -362,6 +408,8 @@ class HermesA2AExecutor(AgentExecutor):
}
if self._response_format is not None:
create_kwargs["response_format"] = self._response_format
if self._tools:
create_kwargs["tools"] = self._tools
try:
response = await self._client.chat.completions.create(**create_kwargs)
@ -388,6 +436,37 @@ class HermesA2AExecutor(AgentExecutor):
# Log to OTEL — intentionally omitted from the A2A reply.
self._log_reasoning(context, reasoning, reasoning_details)
# Handle tool-call response: when the model returns tool calls
# with no text content, serialise the calls as JSON so the caller
# receives structured, parseable output. This keeps the executor
# thin (single API call per turn) while not silently discarding
# function-call intent.
if not content:
tool_calls = getattr(choice.message, "tool_calls", None)
if tool_calls:
serialised = json.dumps([
{
"id": getattr(tc, "id", ""),
"type": getattr(tc, "type", "function"),
"function": {
"name": getattr(
getattr(tc, "function", None), "name", ""
),
"arguments": getattr(
getattr(tc, "function", None), "arguments", "{}"
),
},
}
for tc in tool_calls
])
logger.info(
"hermes_executor: tool_calls response [model=%s n=%d]",
self.model,
len(tool_calls),
)
await event_queue.enqueue_event(new_agent_text_message(serialised))
return
final_text = content.strip() or "(no response generated)"
await event_queue.enqueue_event(new_agent_text_message(final_text))

View File

@ -319,9 +319,25 @@ def _deep_merge_hooks(existing: dict, fragment: dict) -> dict:
out.setdefault("hooks", {})
for event, handlers in fragment.get("hooks", {}).items():
out["hooks"].setdefault(event, [])
out["hooks"][event].extend(handlers)
for key, val in fragment.items():
if key == "hooks":
# Build a set of already-present handler fingerprints so that
# re-installing the same plugin fragment does not append duplicates.
# Key: (matcher, frozenset-of-commands) — same logic the issue spec
# describes. Two handlers are considered identical when they watch the
# same matcher pattern and invoke exactly the same set of commands.
seen: set[tuple[str, frozenset[str]]] = {
(h.get("matcher", ""), frozenset(c.get("command", "") for c in h.get("hooks", [])))
for h in out["hooks"][event]
}
for handler in handlers:
hkey = (
handler.get("matcher", ""),
frozenset(c.get("command", "") for c in handler.get("hooks", [])),
)
if hkey not in seen:
seen.add(hkey)
out["hooks"][event].append(handler)
for top_key, val in fragment.items():
if top_key == "hooks":
continue
out.setdefault(key, val)
out.setdefault(top_key, val)
return out

View File

@ -0,0 +1,112 @@
#!/bin/bash
# molecule-git-token-helper.sh — git credential helper for GitHub App tokens
#
# Fetches a fresh GitHub App installation token from the Molecule AI
# platform endpoint GET /admin/github-installation-token on every git
# push/fetch, so workspace containers never use an expired GH_TOKEN after
# the ~60 min GitHub App token TTL.
#
# # Setup (called once at provision time or initial_prompt)
#
# git config --global \
# "credential.https://github.com.helper" \
# "!/workspace-template/scripts/molecule-git-token-helper.sh"
#
# # How git calls this helper
#
# git passes the action as the first positional arg. The protocol is:
# get → output credentials on stdout (we handle this)
# store → persist credentials (no-op — we never cache)
# erase → revoke credentials (no-op — platform manages lifecycle)
#
# On `get`, git reads key=value pairs terminated by an empty line.
# We must emit at minimum:
# username=x-access-token
# password=<token>
# (blank line)
#
# # Auth
#
# The platform endpoint requires a valid workspace bearer token. The
# token is stored at ${CONFIGS_DIR}/.auth_token (written by platform_auth.py
# on first /registry/register). Workspace env var PLATFORM_URL defaults
# to http://platform:8080.
#
# # Fallback
#
# If the platform endpoint is unreachable (e.g. network partition) or
# returns non-200, the script exits 1 without printing credentials so git
# will fall through to the next helper in the chain (if any). This
# preserves the operator's fallback PAT from .env if present.
#
# # gh CLI re-auth (30-min cron)
#
# To also fix `gh` CLI auth, run this from a workspace cron prompt:
#
# token=$(bash /workspace-template/scripts/molecule-git-token-helper.sh _fetch_token)
# echo "$token" | gh auth login --with-token
#
# (The _fetch_token private action returns only the raw token string.)
#
set -euo pipefail
PLATFORM_URL="${PLATFORM_URL:-http://platform:8080}"
CONFIGS_DIR="${CONFIGS_DIR:-/configs}"
TOKEN_FILE="${CONFIGS_DIR}/.auth_token"
ENDPOINT="${PLATFORM_URL}/admin/github-installation-token"
# _fetch_token — internal helper; also callable directly from cron.
# Outputs the raw token string on success; exits non-zero on failure.
_fetch_token() {
if [ ! -f "${TOKEN_FILE}" ]; then
echo "[molecule-git-token-helper] .auth_token not found at ${TOKEN_FILE}" >&2
exit 1
fi
bearer=$(cat "${TOKEN_FILE}" | tr -d '[:space:]')
if [ -z "${bearer}" ]; then
echo "[molecule-git-token-helper] .auth_token is empty" >&2
exit 1
fi
response=$(curl -sf \
-H "Authorization: Bearer ${bearer}" \
-H "Accept: application/json" \
--max-time 10 \
"${ENDPOINT}" 2>&1) || {
echo "[molecule-git-token-helper] platform request failed: ${response}" >&2
exit 1
}
# Parse {"token":"ghs_...","expires_at":"..."} with sed (no jq dependency).
token=$(echo "${response}" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
if [ -z "${token}" ]; then
echo "[molecule-git-token-helper] empty token in platform response: ${response}" >&2
exit 1
fi
echo "${token}"
}
ACTION="${1:-get}"
case "${ACTION}" in
get)
token=$(_fetch_token) || exit 1
# Emit git credential protocol response.
printf 'username=x-access-token\n'
printf 'password=%s\n' "${token}"
printf '\n'
;;
store|erase)
# No-op — the platform manages token lifecycle.
;;
_fetch_token)
# Private action for cron-based gh auth login --with-token.
_fetch_token
;;
*)
echo "[molecule-git-token-helper] unknown action: ${ACTION}" >&2
exit 1
;;
esac

View File

@ -6,13 +6,16 @@ Coverage targets
- ProviderConfig capability flags derived from model name
- _validate_response_format() valid types, invalid type, missing fields (#498)
- HermesA2AExecutor.__init__ field assignment + client injection,
response_format stored (#498)
response_format stored (#498), tools (#497)
- HermesA2AExecutor._build_messages system prompt + user turn assembly
- HermesA2AExecutor._log_reasoning OTEL span emission + swallowed errors
- HermesA2AExecutor.execute happy path, empty input, API error,
Hermes 4 extra_body, Hermes 3 no extra_body,
reasoning not in reply, reasoning_details,
response_format forwarded / omitted / invalid (#498)
response_format forwarded / omitted / invalid (#498),
tools serialized in request body (#497),
empty tools no tools field (#497),
tool_call response JSON text (#497)
- HermesA2AExecutor.cancel TaskStatusUpdateEvent emitted
The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -847,3 +850,263 @@ async def test_execute_invalid_response_format_returns_error_no_api_call():
# API must NOT have been called
mock_client.chat.completions.create.assert_not_called()
# ---------------------------------------------------------------------------
# Native tools parameter — issue #497
# ---------------------------------------------------------------------------
# Minimal OpenAI-format tool definition used across the tools tests.
_SAMPLE_TOOL: dict = {
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location.",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
},
"required": ["location"],
},
},
}
_SAMPLE_TOOL_2: dict = {
"type": "function",
"function": {
"name": "search_web",
"description": "Search the web.",
"parameters": {
"type": "object",
"properties": {"query": {"type": "string"}},
"required": ["query"],
},
},
}
class _FakeFunction:
"""Stand-in for openai ChatCompletionMessageToolCall.function."""
def __init__(self, name: str, arguments: str) -> None:
self.name = name
self.arguments = arguments
class _FakeToolCall:
"""Stand-in for openai ChatCompletionMessageToolCall."""
def __init__(self, tc_id: str, name: str, arguments: str = "{}") -> None:
self.id = tc_id
self.type = "function"
self.function = _FakeFunction(name=name, arguments=arguments)
def _make_tool_call_response(tool_calls: list, content: str = ""):
"""Build a mock API response that includes tool_calls on the message."""
class _MsgWithToolCalls:
def __init__(self):
self.content = content
self.tool_calls = tool_calls
choice = MagicMock()
choice.message = _MsgWithToolCalls()
response = MagicMock()
response.choices = [choice]
return response
def test_constructor_tools_stored_correctly():
"""tools list is stored as _tools attribute."""
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
_client=MagicMock(),
)
assert executor._tools == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
def test_constructor_none_tools_stored_as_empty_list():
"""tools=None → _tools is [] (empty list, not None)."""
executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=MagicMock())
assert executor._tools == []
def test_constructor_empty_list_stored_as_empty_list():
"""tools=[] → _tools is []."""
executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=MagicMock())
assert executor._tools == []
def test_constructor_tools_is_independent_copy():
"""_tools is a copy — mutating the input list doesn't affect the executor."""
original = [_SAMPLE_TOOL]
executor = HermesA2AExecutor(
model="hermes-4", tools=original, _client=MagicMock()
)
original.append(_SAMPLE_TOOL_2)
assert executor._tools == [_SAMPLE_TOOL]
@pytest.mark.asyncio
async def test_execute_tools_serialized_in_request_body():
"""Non-empty tools list is forwarded to chat.completions.create as tools=."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("Paris is sunny.")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL],
_client=mock_client,
)
await executor.execute(_make_context("weather?"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" in call_kwargs
assert call_kwargs["tools"] == [_SAMPLE_TOOL]
@pytest.mark.asyncio
async def test_execute_multiple_tools_all_forwarded():
"""All tool definitions are forwarded — not truncated."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
_client=mock_client,
)
await executor.execute(_make_context("search?"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs["tools"] == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
@pytest.mark.asyncio
async def test_execute_empty_tools_no_tools_field_in_request():
"""Empty tools list → 'tools' key absent from API call (not tools=[])."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=mock_client)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_none_tools_no_tools_field_in_request():
"""tools=None → 'tools' key absent from API call."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=mock_client)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_default_no_tools_field_in_request():
"""Constructor with no tools kwarg → 'tools' key absent from API call."""
executor, mock_client = _make_executor(model="hermes-4")
mock_client.chat.completions.create.return_value = _make_api_response("ok")
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_tool_call_response_returns_json():
"""Model returns tool_calls with no content → reply is JSON-serialised calls."""
import json
mock_client = MagicMock()
tc = _FakeToolCall("call_abc123", "get_weather", '{"location":"Paris"}')
mock_client.chat.completions.create = AsyncMock(
return_value=_make_tool_call_response(tool_calls=[tc], content="")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL],
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("weather in Paris?"), eq)
eq.enqueue_event.assert_called_once()
reply = eq.enqueue_event.call_args[0][0]
# Must be valid JSON
parsed = json.loads(reply)
assert isinstance(parsed, list)
assert len(parsed) == 1
assert parsed[0]["function"]["name"] == "get_weather"
assert parsed[0]["function"]["arguments"] == '{"location":"Paris"}'
assert parsed[0]["id"] == "call_abc123"
assert parsed[0]["type"] == "function"
@pytest.mark.asyncio
async def test_execute_multiple_tool_calls_all_in_json():
"""Multiple tool calls are all serialised into the JSON reply."""
import json
mock_client = MagicMock()
tc1 = _FakeToolCall("call_1", "get_weather", '{"location":"Paris"}')
tc2 = _FakeToolCall("call_2", "search_web", '{"query":"news"}')
mock_client.chat.completions.create = AsyncMock(
return_value=_make_tool_call_response(tool_calls=[tc1, tc2], content="")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("do both"), eq)
reply = eq.enqueue_event.call_args[0][0]
parsed = json.loads(reply)
assert len(parsed) == 2
assert parsed[0]["function"]["name"] == "get_weather"
assert parsed[1]["function"]["name"] == "search_web"
@pytest.mark.asyncio
async def test_execute_text_content_wins_over_tool_calls():
"""When model returns both text content AND tool_calls, text is used."""
mock_client = MagicMock()
tc = _FakeToolCall("call_xyz", "get_weather", '{"location":"Berlin"}')
mock_client.chat.completions.create = AsyncMock(
return_value=_make_tool_call_response(
tool_calls=[tc], content="The weather is fine."
)
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL],
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("weather?"), eq)
reply = eq.enqueue_event.call_args[0][0]
assert reply == "The weather is fine."

View File

@ -7,6 +7,7 @@ Covers:
- Empty rules directory doesn't write an empty block
- README.md / CHANGELOG.md are skipped at the root (not treated as fragments)
- Uninstall is safe on a plugin that was never installed
- _deep_merge_hooks deduplication (issue #566)
"""
from __future__ import annotations
@ -393,3 +394,90 @@ async def test_setup_sh_absent_no_warning(tmp_path: Path):
result = await AgentskillsAdaptor("p", "claude_code").install(_make_ctx(configs, plugin))
assert result.warnings == []
# ---------------------------------------------------------------------------
# _deep_merge_hooks deduplication — issue #566
# ---------------------------------------------------------------------------
from plugins_registry.builtins import _deep_merge_hooks # noqa: E402
def _make_fragment(event: str, matcher: str, command: str) -> dict:
"""Build a minimal settings-fragment dict for one hook handler."""
return {
"hooks": {
event: [
{
"matcher": matcher,
"hooks": [{"type": "command", "command": command}],
}
]
}
}
def test_deep_merge_hooks_first_install_adds_handler():
"""Merging into an empty dict adds the handler exactly once."""
result = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
handlers = result["hooks"]["PreToolUse"]
assert len(handlers) == 1
assert handlers[0]["matcher"] == "Bash"
def test_deep_merge_hooks_dedup_on_reinstall():
"""Merging the same fragment twice must not duplicate the handler."""
fragment = _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh")
once = _deep_merge_hooks({}, fragment)
twice = _deep_merge_hooks(once, fragment)
assert len(twice["hooks"]["PreToolUse"]) == 1, (
"Re-installing the same fragment must not append a duplicate handler"
)
def test_deep_merge_hooks_dedup_three_reinstalls():
"""Issue #566 reported 34× duplication — verify three installs still yield one entry."""
fragment = _make_fragment("PostToolUse", "Write", "/hooks/format.sh")
state = {}
for _ in range(3):
state = _deep_merge_hooks(state, fragment)
assert len(state["hooks"]["PostToolUse"]) == 1
def test_deep_merge_hooks_different_matchers_both_kept():
"""Two handlers with different matchers must co-exist — dedup must not over-filter."""
state = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
state = _deep_merge_hooks(state, _make_fragment("PreToolUse", "Edit", "/hooks/lint.sh"))
assert len(state["hooks"]["PreToolUse"]) == 2
def test_deep_merge_hooks_different_commands_both_kept():
"""Same matcher but different commands → both handlers must be kept."""
state = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
state = _deep_merge_hooks(state, _make_fragment("PreToolUse", "Bash", "/hooks/security.sh"))
assert len(state["hooks"]["PreToolUse"]) == 2
def test_deep_merge_hooks_existing_user_hooks_preserved():
"""Existing hooks in settings.json that don't match the fragment must survive."""
existing = {
"hooks": {
"PreToolUse": [
{"matcher": "Bash", "hooks": [{"type": "command", "command": "/user/custom.sh"}]}
]
}
}
fragment = _make_fragment("PreToolUse", "Edit", "/hooks/lint.sh")
result = _deep_merge_hooks(existing, fragment)
matchers = {h["matcher"] for h in result["hooks"]["PreToolUse"]}
assert matchers == {"Bash", "Edit"}
def test_deep_merge_hooks_top_level_keys_merged():
"""Non-hook top-level keys in the fragment are merged into the output."""
existing = {"someKey": "old"}
fragment = {"someKey": "new", "anotherKey": "value", "hooks": {}}
result = _deep_merge_hooks(existing, fragment)
# setdefault semantics: existing keys win, new keys are added
assert result["someKey"] == "old"
assert result["anotherKey"] == "value"