merge: sync staging into feat/wire-max-concurrent-from-template-1408

This commit is contained in:
rabbitblood 2026-04-26 11:11:30 -07:00
commit 4e6f6bf0f3
101 changed files with 10401 additions and 547 deletions

View File

@ -1,7 +1,100 @@
import type { NextConfig } from "next";
import { existsSync, readFileSync } from "node:fs";
import { dirname, join } from "node:path";
// Load NEXT_PUBLIC_* vars from the monorepo root .env so a fresh
// `pnpm dev` works without a per-developer canvas/.env.local. Next.js
// only auto-loads .env from the project root by default — but our
// canonical config (NEXT_PUBLIC_PLATFORM_URL, NEXT_PUBLIC_WS_URL,
// MOLECULE_ENV, etc.) lives at the monorepo root, gitignored, shared
// by the Go platform binary. Without this, the canvas falls back to
// `window.location` (`ws://localhost:3000/ws`) and the WS pill stays
// "Reconnecting" forever because Next.js dev doesn't serve /ws.
//
// Mirrors workspace-server/cmd/server/dotenv.go's monorepo-rooted .env
// loader. Both processes look for the SAME marker (`workspace-server/
// go.mod`) so a developer renaming or relocating the repo only has to
// update one heuristic. Production is unaffected: `output: "standalone"`
// bakes resolved env into the build, and the marker file isn't shipped.
loadMonorepoEnv();
const nextConfig: NextConfig = {
output: "standalone",
};
export default nextConfig;
function loadMonorepoEnv() {
const root = findMonorepoRoot(__dirname);
if (!root) return;
const envPath = join(root, ".env");
if (!existsSync(envPath)) return;
const body = readFileSync(envPath, "utf8");
let loaded = 0;
let skipped = 0;
for (const line of body.split(/\r?\n/)) {
const kv = parseLine(line);
if (!kv) continue;
const [k, v] = kv;
// Existing env wins. NOTE: an explicitly-set empty string
// (`KEY=` exported from a parent shell, where Node represents it
// as `""` not `undefined`) counts as "set" — we keep the empty
// value rather than backfilling from the file. Matches Go's
// os.LookupEnv check in workspace-server/cmd/server/dotenv.go so
// both processes treat the same input identically. Operators who
// want the file value to win must `unset KEY` in the launching
// shell.
if (process.env[k] !== undefined) {
skipped++;
continue;
}
process.env[k] = v;
loaded++;
}
// eslint-disable-next-line no-console
console.log(
`[next.config] loaded ${loaded} vars from ${envPath} (${skipped} already set in env)`,
);
}
function findMonorepoRoot(start: string): string | null {
let dir = start;
for (let i = 0; i < 6; i++) {
if (existsSync(join(dir, "workspace-server", "go.mod"))) return dir;
const parent = dirname(dir);
if (parent === dir) break;
dir = parent;
}
return null;
}
// Mirror of workspace-server/cmd/server/dotenv.go's parseDotEnvLine
// — same rules so the two loaders agree on every line in the shared
// .env. If you change one parser, change the other.
function parseLine(raw: string): [string, string] | null {
let line = raw.replace(/^/, "").trim();
if (line === "" || line.startsWith("#")) return null;
// `export ` prefix uses a literal space — `export\tFOO=bar` with a
// tab is intentionally rejected, matching the Go mirror in
// workspace-server/cmd/server/dotenv.go. Shells emit the prefix
// with a space; tabs would only appear in hand-mangled files.
if (line.startsWith("export ")) line = line.slice("export ".length).trimStart();
const eq = line.indexOf("=");
if (eq <= 0) return null;
const k = line.slice(0, eq).trim();
let v = line.slice(eq + 1).replace(/^[ \t]+/, "");
if (v.length >= 2 && (v[0] === '"' || v[0] === "'")) {
const quote = v[0];
const end = v.indexOf(quote, 1);
if (end >= 0) return [k, v.slice(1, end)];
// unterminated — fall through to bare-value handling
}
for (let i = 0; i < v.length; i++) {
if (v[i] !== "#") continue;
if (i === 0 || v[i - 1] === " " || v[i - 1] === "\t") {
v = v.slice(0, i);
break;
}
}
return [k, v.trim()];
}

View File

@ -1,5 +1,9 @@
@import "xterm/css/xterm.css";
/* Theme tokens MUST load before any feature stylesheet that
references them so custom properties are in scope. */
@import "../styles/theme-tokens.css";
@import "../styles/settings-panel.css";
@import "../styles/org-deploy.css";
@tailwind base;
@tailwind components;
@ -38,7 +42,20 @@ body {
}
.react-flow__node {
transition: box-shadow 0.2s ease;
/* Transform transition drives the "spawn from parent" motion
org-deploy sets the node's initial position to the parent's
absolute coords, then repositions to the real slot, and this
transition interpolates the translate() in between.
Non-deploy workspace moves (drag, nest) get the same smoothing
for free. */
transition:
box-shadow var(--mol-duration-fast) ease,
transform var(--mol-duration-spawn) var(--mol-easing-bounce-out);
}
/* Drag events must feel instant React Flow adds this class
for the lifetime of the gesture. */
.react-flow__node.dragging {
transition: box-shadow var(--mol-duration-fast) ease;
}
/* Scrollbar styling */

View File

@ -7,13 +7,19 @@ import { CommunicationOverlay } from "@/components/CommunicationOverlay";
import { Spinner } from "@/components/Spinner";
import { connectSocket, disconnectSocket } from "@/store/socket";
import { useCanvasStore } from "@/store/canvas";
import { api } from "@/lib/api";
import { api, PlatformUnavailableError } from "@/lib/api";
import type { WorkspaceData } from "@/store/socket";
export default function Home() {
const hydrationError = useCanvasStore((s) => s.hydrationError);
const setHydrationError = useCanvasStore((s) => s.setHydrationError);
const [hydrating, setHydrating] = useState(true);
// Distinct from hydrationError: platform-down is its own UX path
// (different copy, different action — the user's next step is to
// check local services, not to retry the API call). Tracked
// separately rather than encoded into hydrationError so the
// generic-error branch can stay simple.
const [platformDown, setPlatformDown] = useState(false);
useEffect(() => {
connectSocket();
@ -28,8 +34,11 @@ export default function Home() {
useCanvasStore.getState().setViewport(viewport);
}
}).catch((err) => {
// Initial hydration failed — show error banner to user
console.error("Canvas: initial hydration failed", err);
if (err instanceof PlatformUnavailableError) {
setPlatformDown(true);
return;
}
useCanvasStore.getState().setHydrationError(
err instanceof Error && err.message ? err.message : "Failed to load canvas"
);
@ -53,6 +62,10 @@ export default function Home() {
);
}
if (platformDown) {
return <PlatformDownDiagnostic />;
}
return (
<>
<Canvas />
@ -83,3 +96,43 @@ export default function Home() {
</>
);
}
/**
* Dedicated diagnostic for the case where the platform reported its
* datastore (Postgres / Redis) is unreachable. Distinct from the
* generic API-error overlay: the user's next action is to check
* local services, not to retry the API call. Includes the exact
* commands for the common dev-host setup.
*/
function PlatformDownDiagnostic() {
return (
<div
role="alert"
className="fixed inset-0 flex flex-col items-center justify-center bg-zinc-950 text-zinc-300 gap-5 z-[9999] px-6"
>
<div className="text-amber-400 text-sm font-semibold uppercase tracking-wider">
Platform infrastructure unreachable
</div>
<p className="text-zinc-400 text-sm max-w-lg text-center leading-relaxed">
The platform server returned <code className="font-mono text-amber-300">503 platform_unavailable</code>.
That means it can&apos;t reach Postgres or Redis to validate your session.
Most common cause on a dev host: one of those services stopped.
</p>
<div className="bg-zinc-900/80 border border-zinc-700/50 rounded-lg px-4 py-3 max-w-lg w-full">
<div className="text-[10px] uppercase tracking-wider text-zinc-500 mb-2">Try first</div>
<pre className="text-[12px] text-zinc-300 font-mono whitespace-pre-wrap leading-relaxed">{`brew services start postgresql@14
brew services start redis`}</pre>
</div>
<p className="text-[11px] text-zinc-500 max-w-lg text-center">
If both are running, check <code className="font-mono">/tmp/molecule-server.log</code> for
the underlying error. If you&apos;re on hosted SaaS, this is a platform incident try again in a moment.
</p>
<button
onClick={() => window.location.reload()}
className="px-4 py-2 bg-blue-600 hover:bg-blue-500 text-white rounded-md text-sm mt-2"
>
Reload
</button>
</div>
);
}

View File

@ -74,7 +74,11 @@ export function buildA2AEdges(
});
}
// 3. Build React Flow Edge objects
// 3. Build React Flow Edge objects. We tag every overlay edge with
// type: "a2a" so React Flow renders it via our custom A2AEdge
// component (canvas/A2AEdge.tsx). The custom component portals
// its label out of the SVG layer so it (a) doesn't get hidden
// behind workspace cards and (b) is clickable.
return Array.from(map.values()).map(({ source, target, count, lastAt }) => {
const isHot = now - lastAt < A2A_HOT_MS;
const stroke = isHot ? "#8b5cf6" : "#3b82f6"; // violet-500 : blue-500
@ -84,6 +88,7 @@ export function buildA2AEdges(
return {
id: `a2a-${source}-${target}`,
type: "a2a",
source,
target,
animated: isHot,
@ -96,22 +101,22 @@ export function buildA2AEdges(
style: {
stroke,
strokeWidth: 2,
// Non-blocking: label overlay never intercepts pointer events
// Path itself stays non-interactive so node drags through
// the line still work. The clickable target is the label
// pill, which sets pointerEvents: all on its own div.
pointerEvents: "none" as React.CSSProperties["pointerEvents"],
},
// `label` keeps the same string for back-compat with any test
// that asserts on it (e.g. buildA2AEdges output shape). Custom
// edge reads the rich data from `data` so the label visual is
// not constrained to a string anymore.
label,
labelStyle: {
fill: "#a1a1aa", // zinc-400
fontSize: 10,
pointerEvents: "none" as React.CSSProperties["pointerEvents"],
data: {
count,
lastAt,
isHot,
label,
},
labelBgStyle: {
fill: "#18181b", // zinc-900
fillOpacity: 0.9,
pointerEvents: "none" as React.CSSProperties["pointerEvents"],
},
labelBgPadding: [4, 6] as [number, number],
labelBgBorderRadius: 4,
};
});
}

View File

@ -36,11 +36,22 @@ import { DropTargetBadge } from "./canvas/DropTargetBadge";
import { useDragHandlers } from "./canvas/useDragHandlers";
import { useKeyboardShortcuts } from "./canvas/useKeyboardShortcuts";
import { useCanvasViewport } from "./canvas/useCanvasViewport";
import { A2AEdge } from "./canvas/A2AEdge";
const nodeTypes = {
workspaceNode: WorkspaceNode,
};
// Custom edge types. The default React Flow edge renders its label
// inside the SVG group (always under nodes) with pointerEvents: none
// inherited from the path. A2AEdge portals the label to a sibling
// DOM layer so it renders above nodes and accepts clicks. Keep the
// reference stable (module-scope const) so React Flow doesn't see a
// new edgeTypes object on every render and warn about prop churn.
const edgeTypes = {
a2a: A2AEdge,
};
const defaultEdgeOptions: Partial<Edge> = {
animated: true,
style: {
@ -58,14 +69,95 @@ export function Canvas() {
}
function CanvasInner() {
const nodes = useCanvasStore((s) => s.nodes);
const rawNodes = useCanvasStore((s) => s.nodes);
const edges = useCanvasStore((s) => s.edges);
const a2aEdges = useCanvasStore((s) => s.a2aEdges);
const showA2AEdges = useCanvasStore((s) => s.showA2AEdges);
const deletingIds = useCanvasStore((s) => s.deletingIds);
const allEdges = useMemo(
() => (showA2AEdges ? [...edges, ...a2aEdges] : edges),
[edges, a2aEdges, showA2AEdges],
);
// Drag-lock during a system-owned operation (deploy OR delete).
// React Flow respects Node.draggable, which stops the gesture
// before it starts — preventDefault() on the drag-start callback
// isn't authoritative in v12. We project `draggable: false` onto
// each locked node before handing the array to ReactFlow; the
// drag-start handler in useDragHandlers remains as a belt-and-
// braces check.
//
// Perf: short-circuit when nothing is provisioning so the memo
// passes rawNodes through unchanged (identity-stable → RF
// reconciles nothing). When a deploy IS active, build an O(n)
// root index once and re-use it. Critically, do NOT spread every
// node — only mutate the locked ones — so unmodified nodes keep
// their object identity and RF's per-node memo short-circuits.
const nodes = useMemo(() => {
const anyProvisioning = rawNodes.some((n) => n.data.status === "provisioning");
const anyDeleting = deletingIds.size > 0;
if (!anyProvisioning && !anyDeleting) return rawNodes;
const byId = new Map<string, typeof rawNodes[number]>();
for (const n of rawNodes) byId.set(n.id, n);
const rootOf = new Map<string, string>();
const resolveRoot = (id: string): string => {
// Iterative walk guards against a pathological cycle (hostile
// data) — recursion would hit the stack limit on a deep tree.
const visited = new Set<string>();
let cursor: string | null = id;
while (cursor) {
if (visited.has(cursor)) break;
visited.add(cursor);
const cached = rootOf.get(cursor);
if (cached) {
for (const seenId of visited) rootOf.set(seenId, cached);
return cached;
}
const n = byId.get(cursor);
if (!n) break;
if (!n.data.parentId) {
for (const seenId of visited) rootOf.set(seenId, cursor);
return cursor;
}
cursor = n.data.parentId;
}
return id;
};
const provisioningByRoot = new Map<string, number>();
for (const n of rawNodes) {
if (n.data.status !== "provisioning") continue;
const rootId = resolveRoot(n.id);
provisioningByRoot.set(rootId, (provisioningByRoot.get(rootId) ?? 0) + 1);
}
let touched = false;
const next = rawNodes.map((n) => {
const rootId = resolveRoot(n.id);
const deployLocked = n.id !== rootId && (provisioningByRoot.get(rootId) ?? 0) > 0;
// Delete-locked: nothing in a subtree whose DELETE is in
// flight should be draggable, INCLUDING the root of that
// subtree (unlike deploy, there's no cancel — the delete
// is irrevocable at this point).
const deleteLocked = deletingIds.has(n.id);
const shouldLock = deployLocked || deleteLocked;
if (shouldLock && n.draggable !== false) {
touched = true;
return { ...n, draggable: false };
}
if (!shouldLock && n.draggable === false) {
// Node was locked in a prior render; deploy cancelled /
// completed, or delete failed and was reverted. Restore
// default dragability.
touched = true;
const { draggable: _d, ...rest } = n;
void _d;
return rest as typeof n;
}
return n; // identity-preserved
});
return touched ? next : rawNodes;
}, [rawNodes, deletingIds]);
const onNodesChange = useCanvasStore((s) => s.onNodesChange);
const selectNode = useCanvasStore((s) => s.selectNode);
const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
@ -91,18 +183,45 @@ function CanvasInner() {
// outside-click handler.
const pendingDelete = useCanvasStore((s) => s.pendingDelete);
const setPendingDelete = useCanvasStore((s) => s.setPendingDelete);
const removeNode = useCanvasStore((s) => s.removeNode);
const removeSubtree = useCanvasStore((s) => s.removeSubtree);
const confirmDelete = useCallback(async () => {
if (!pendingDelete) return;
const { id } = pendingDelete;
setPendingDelete(null);
// Compute the full subtree and mark it as "deleting" so every
// node in the chain renders dim + non-draggable during the
// network round-trip + the server-side cascade. Matches the
// deploy-lock UX: once a system-initiated operation owns this
// subtree, the user shouldn't be able to move its pieces
// around until it resolves.
const state = useCanvasStore.getState();
const subtree = new Set<string>();
const stack = [id];
while (stack.length) {
const nid = stack.pop()!;
subtree.add(nid);
for (const n of state.nodes) {
if (n.data.parentId === nid) stack.push(n.id);
}
}
state.beginDelete(subtree);
try {
await api.del(`/workspaces/${id}?confirm=true`);
removeNode(id);
// Mirror the server-side cascade locally — drop the parent AND
// every descendant in one atomic update. The per-descendant
// WORKSPACE_REMOVED WS events still arrive (and are no-ops
// because the nodes are already gone), but we no longer depend
// on them: a wedged WS used to leave orphan child cards on the
// canvas until the user refreshed the page.
removeSubtree(id);
state.endDelete(subtree);
} catch (e) {
// Network or server error — restore the subtree to normal
// interaction and surface the error.
state.endDelete(subtree);
showToast(e instanceof Error ? e.message : "Delete failed", "error");
}
}, [pendingDelete, setPendingDelete, removeNode]);
}, [pendingDelete, setPendingDelete, removeSubtree]);
const onPaneClick = useCallback(() => {
selectNode(null);
@ -141,6 +260,7 @@ function CanvasInner() {
onPaneClick={onPaneClick}
onMoveEnd={onMoveEnd}
nodeTypes={nodeTypes}
edgeTypes={edgeTypes}
defaultEdgeOptions={defaultEdgeOptions}
defaultViewport={defaultViewport}
fitView={viewport.x === 0 && viewport.y === 0 && viewport.zoom === 1}

View File

@ -1,27 +1,19 @@
"use client";
import { useState, useEffect } from "react";
import { useState, useEffect, useCallback } from "react";
import { api } from "@/lib/api";
import { useCanvasStore } from "@/store/canvas";
import { OrgTemplatesSection } from "./TemplatePalette";
import { type Template } from "@/lib/deploy-preflight";
import { useTemplateDeploy } from "@/hooks/useTemplateDeploy";
import { Spinner } from "./Spinner";
import { TIER_CONFIG } from "@/lib/design-tokens";
interface Template {
id: string;
name: string;
description: string;
tier: number;
model: string;
skills: string[];
skill_count: number;
}
export function EmptyState() {
const [templates, setTemplates] = useState<Template[]>([]);
const [loading, setLoading] = useState(true);
const [deploying, setDeploying] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [blankCreating, setBlankCreating] = useState(false);
const [blankError, setBlankError] = useState<string | null>(null);
useEffect(() => {
api
@ -31,48 +23,56 @@ export function EmptyState() {
.finally(() => setLoading(false));
}, []);
const deploy = async (template: Template) => {
setDeploying(template.id);
setError(null);
try {
const ws = await api.post<{ id: string }>("/workspaces", {
name: template.name,
template: template.id,
tier: template.tier,
canvas: { x: 200, y: 150 },
});
// Auto-select the new workspace and open chat
setTimeout(() => {
useCanvasStore.getState().selectNode(ws.id);
useCanvasStore.getState().setPanelTab("chat");
}, 500);
} catch (e) {
setError(e instanceof Error ? e.message : "Deploy failed");
} finally {
setDeploying(null);
}
};
// Canvas fills in a visible "center-ish" spot on a fresh tenant so
// the user doesn't have to pan to find their new workspace. Fixed
// (200, 150) instead of the sidebar's random placement because the
// canvas is guaranteed empty when this component mounts.
const firstDeployCoords = useCallback(() => ({ x: 200, y: 150 }), []);
// After the POST succeeds, auto-select the new workspace and flip
// the panel to Chat. This is a UX flourish that only makes sense
// on first deploy (the canvas is empty so the selection can't
// surprise anyone); the sidebar intentionally skips this step.
// 500 ms delay so React Flow has a frame to render the new node
// before it receives focus.
const handleDeployed = useCallback((workspaceId: string) => {
setTimeout(() => {
useCanvasStore.getState().selectNode(workspaceId);
useCanvasStore.getState().setPanelTab("chat");
}, 500);
}, []);
const { deploy, deploying, error, modal } = useTemplateDeploy({
canvasCoords: firstDeployCoords,
onDeployed: handleDeployed,
});
// "Create blank" bypasses templates entirely — no preflight, no
// modal, just POST /workspaces with a default name and tier.
// Deliberately NOT routed through useTemplateDeploy because it
// has no `template.id` to deploy against.
const createBlank = async () => {
setDeploying("blank");
setError(null);
setBlankCreating(true);
setBlankError(null);
try {
const ws = await api.post<{ id: string }>("/workspaces", {
name: "My First Agent",
tier: 2,
canvas: { x: 200, y: 150 },
canvas: firstDeployCoords(),
});
setTimeout(() => {
useCanvasStore.getState().selectNode(ws.id);
useCanvasStore.getState().setPanelTab("chat");
}, 500);
handleDeployed(ws.id);
} catch (e) {
setError(e instanceof Error ? e.message : "Create failed");
setBlankError(e instanceof Error ? e.message : "Create failed");
} finally {
setDeploying(null);
setBlankCreating(false);
}
};
// Any active gesture locks every button so the user can't fire a
// second POST while the first is still in flight.
const anyDeploying = !!deploying || blankCreating;
const displayError = error ?? blankError;
return (
<div className="absolute inset-0 flex items-start justify-center pointer-events-none z-[1] overflow-y-auto py-8">
<div className="relative max-w-2xl w-full rounded-3xl border border-zinc-800/70 bg-zinc-950/80 backdrop-blur-xl px-8 py-8 text-center shadow-2xl shadow-black/40 pointer-events-auto mx-4">
@ -112,8 +112,8 @@ export function EmptyState() {
<button
type="button"
key={t.id}
onClick={() => deploy(t)}
disabled={!!deploying}
onClick={() => void deploy(t)}
disabled={anyDeploying}
className="group rounded-xl border border-zinc-800/60 bg-zinc-900/50 px-3.5 py-3 hover:border-blue-500/40 hover:bg-zinc-900/80 transition-all disabled:opacity-50 disabled:cursor-not-allowed disabled:hover:border-zinc-800/60 disabled:hover:bg-zinc-900/50 text-left focus:outline-none focus-visible:ring-2 focus-visible:ring-blue-500/70"
>
<div className="flex items-center gap-2 mb-1">
@ -143,10 +143,10 @@ export function EmptyState() {
<button
type="button"
onClick={createBlank}
disabled={!!deploying}
disabled={anyDeploying}
className="w-full rounded-xl border border-dashed border-zinc-700/60 bg-zinc-900/30 px-4 py-3 text-sm text-zinc-400 hover:text-zinc-200 hover:border-zinc-600 hover:bg-zinc-900/50 transition-all disabled:opacity-50 disabled:cursor-not-allowed disabled:hover:text-zinc-400 disabled:hover:border-zinc-700/60 focus:outline-none focus-visible:ring-2 focus-visible:ring-blue-500/70"
>
{deploying === "blank" ? "Creating..." : "+ Create blank workspace"}
{blankCreating ? "Creating..." : "+ Create blank workspace"}
</button>
{/* Org templates — instantiate a whole team in one click */}
@ -154,12 +154,17 @@ export function EmptyState() {
<OrgTemplatesSection />
</div>
{error && (
{displayError && (
<div role="alert" className="mt-3 px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-xs text-red-400">
{error}
{displayError}
</div>
)}
{/* Missing-keys preflight modal owned by useTemplateDeploy,
shared with TemplatePalette. Rendered inline here so it
overlays this card naturally. */}
{modal}
{/* Tips */}
<div className="mt-5 pt-4 border-t border-zinc-800/50">
<div className="flex items-center justify-center gap-6 text-[10px] text-zinc-400">

View File

@ -1,19 +1,92 @@
"use client";
import { useEffect, useState } from "react";
import { STATUS_CONFIG } from "@/lib/design-tokens";
import { useCanvasStore } from "@/store/canvas";
const LEGEND_STATUSES = ["online", "provisioning", "degraded", "failed", "paused", "offline"] as const;
// Persist the user's choice across sessions. Default is "open" so
// first-time users still see the symbol key; once dismissed we
// respect that until they explicitly reopen via the floating pill.
const STORAGE_KEY = "molecule.legend.open";
function readStoredOpen(): boolean {
if (typeof window === "undefined") return true;
try {
const v = window.localStorage.getItem(STORAGE_KEY);
if (v === null) return true;
return v === "1";
} catch {
return true;
}
}
function writeStoredOpen(open: boolean) {
if (typeof window === "undefined") return;
try {
window.localStorage.setItem(STORAGE_KEY, open ? "1" : "0");
} catch {
// localStorage can throw in private mode / quota / disabled
// contexts. Silent fallback — the in-memory state still works
// for the current session.
}
}
export function Legend() {
// TemplatePalette (when open) is fixed top-0 left-0 w-[280px] — the
// default bottom-6 left-4 position of this legend would sit under it.
// Shift past the 280 px palette + a 16 px gap when the palette is open.
const paletteOpen = useCanvasStore((s) => s.templatePaletteOpen);
const leftClass = paletteOpen ? "left-[296px]" : "left-4";
// SSR-safe pattern: mount with the default (true) so first paint
// matches the server output, then hydrate the persisted value
// after mount. Avoids a hydration mismatch warning when the user
// had previously closed the legend.
const [open, setOpen] = useState(true);
useEffect(() => {
setOpen(readStoredOpen());
}, []);
const closeLegend = () => {
setOpen(false);
writeStoredOpen(false);
};
const openLegend = () => {
setOpen(true);
writeStoredOpen(true);
};
if (!open) {
return (
<button
type="button"
onClick={openLegend}
aria-label="Show legend"
title="Show legend"
className={`fixed bottom-6 ${leftClass} z-30 flex items-center gap-1.5 rounded-full bg-zinc-900/95 border border-zinc-700/50 px-3 py-1.5 text-[11px] font-semibold text-zinc-400 uppercase tracking-wider shadow-xl shadow-black/30 backdrop-blur-sm hover:text-zinc-200 hover:border-zinc-600 transition-[left,colors] duration-200`}
>
<span aria-hidden="true" className="text-[10px]"></span>
Legend
</button>
);
}
return (
<div className={`fixed bottom-6 ${leftClass} z-30 bg-zinc-900/95 border border-zinc-700/50 rounded-xl px-4 py-3 shadow-xl shadow-black/30 backdrop-blur-sm max-w-[280px] transition-[left] duration-200`}>
<div className="text-[11px] font-semibold text-zinc-400 uppercase tracking-wider mb-2">Legend</div>
<div className="flex items-start justify-between mb-2">
<div className="text-[11px] font-semibold text-zinc-400 uppercase tracking-wider">Legend</div>
<button
type="button"
onClick={closeLegend}
aria-label="Hide legend"
title="Hide legend"
className="-mt-0.5 -mr-1 px-1.5 text-[14px] leading-none text-zinc-500 hover:text-zinc-200 transition-colors"
>
×
</button>
</div>
{/* Status */}
<div className="mb-2">

View File

@ -1,6 +1,7 @@
"use client";
import { useState, useEffect, useCallback, useRef, useMemo } from "react";
import { createPortal } from "react-dom";
import { api } from "@/lib/api";
import { getKeyLabel, type ProviderChoice } from "@/lib/deploy-preflight";
@ -196,6 +197,12 @@ function ProviderPickerModal({
);
if (!open) return null;
// Portal to document.body for the same reason as
// OrgImportPreflightModal — several callers (TemplatePalette,
// EmptyState) render the modal inside their own fixed+filtered
// containers, which re-anchor the "fixed" positioning to the
// wrapper's bounds instead of the viewport.
if (typeof document === "undefined") return null;
const allSaved = entries.length > 0 && entries.every((e) => e.saved);
const anySaving = entries.some((e) => e.saving);
@ -203,8 +210,14 @@ function ProviderPickerModal({
.replace(/[-_]/g, " ")
.replace(/\b\w/g, (c) => c.toUpperCase());
return (
<div className="fixed inset-0 z-50 flex items-center justify-center">
return createPortal(
// z-[60] so this stacks ABOVE OrgImportPreflightModal (z-50).
// Both can be on screen at once during an org import: the org-
// preflight is open while the user clicks a per-workspace deploy
// that triggers MissingKeys. Without the explicit z-order the
// backdrop click might dismiss the wrong modal depending on
// React's commit ordering.
<div className="fixed inset-0 z-[60] flex items-center justify-center">
<div
aria-hidden="true"
className="absolute inset-0 bg-black/70 backdrop-blur-sm"
@ -215,7 +228,7 @@ function ProviderPickerModal({
role="dialog"
aria-modal="true"
aria-labelledby="missing-keys-title"
className="relative bg-zinc-900 border border-zinc-700 rounded-xl shadow-2xl shadow-black/50 max-w-[480px] w-full mx-4 overflow-hidden"
className="relative bg-zinc-900 border border-zinc-700 rounded-xl shadow-2xl shadow-black/50 max-w-[480px] w-full mx-4 max-h-[80vh] overflow-auto"
>
<div className="px-5 py-4 border-b border-zinc-800">
<div className="flex items-center gap-2 mb-1">
@ -360,7 +373,8 @@ function ProviderPickerModal({
</div>
</div>
</div>
</div>
</div>,
document.body,
);
}
@ -474,6 +488,7 @@ function AllKeysModal({
}, [open]);
if (!open) return null;
if (typeof document === "undefined") return null;
const allSaved = entries.length > 0 && entries.every((e) => e.saved);
const anySaving = entries.some((e) => e.saving);
@ -481,8 +496,14 @@ function AllKeysModal({
.replace(/[-_]/g, " ")
.replace(/\b\w/g, (c) => c.toUpperCase());
return (
<div className="fixed inset-0 z-50 flex items-center justify-center">
return createPortal(
// z-[60] so this stacks ABOVE OrgImportPreflightModal (z-50).
// Both can be on screen at once during an org import: the org-
// preflight is open while the user clicks a per-workspace deploy
// that triggers MissingKeys. Without the explicit z-order the
// backdrop click might dismiss the wrong modal depending on
// React's commit ordering.
<div className="fixed inset-0 z-[60] flex items-center justify-center">
<div
className="absolute inset-0 bg-black/70 backdrop-blur-sm"
aria-hidden="true"
@ -493,7 +514,7 @@ function AllKeysModal({
role="dialog"
aria-modal="true"
aria-labelledby="missing-keys-title"
className="relative bg-zinc-900 border border-zinc-700 rounded-xl shadow-2xl shadow-black/50 max-w-[440px] w-full mx-4 overflow-hidden"
className="relative bg-zinc-900 border border-zinc-700 rounded-xl shadow-2xl shadow-black/50 max-w-[440px] w-full mx-4 max-h-[80vh] overflow-auto"
>
<div className="px-5 py-4 border-b border-zinc-800">
<div className="flex items-center gap-2 mb-1">
@ -608,6 +629,7 @@ function AllKeysModal({
</div>
</div>
</div>
</div>
</div>,
document.body,
);
}

View File

@ -0,0 +1,540 @@
"use client";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { createPortal } from "react-dom";
import { createSecret } from "@/lib/api/secrets";
/**
* One entry from the server's preflight `required_env` / `recommended_env`.
*
* - A plain string is a STRICT requirement: that exact env var must be
* configured.
* - A `{any_of: [...]}` object is an OR group: at least one member
* must be configured to satisfy it. Lets a template say "either
* ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN" without forcing
* both.
*
* Matches the Go `EnvRequirement` type's JSON shape (MarshalJSON in
* workspace-server/internal/handlers/org.go). The union is written so
* that a narrow check `typeof e === "string"` distinguishes cleanly.
*/
export type EnvRequirement = string | { any_of: string[] };
/** Flat member list for a requirement. */
export function envReqMembers(r: EnvRequirement): string[] {
return typeof r === "string" ? [r] : r.any_of;
}
/** True if any member is present in `configured`. */
export function envReqSatisfied(r: EnvRequirement, configured: Set<string>): boolean {
if (typeof r === "string") return configured.has(r);
return r.any_of.some((m) => configured.has(m));
}
/** Stable react-key / dedup key for a requirement. Sorted for groups so
* reordered-member variants still collapse to one entry. */
export function envReqKey(r: EnvRequirement): string {
if (typeof r === "string") return r;
return [...r.any_of].sort().join("|");
}
interface Props {
open: boolean;
/** Display name of the org template — headline only. */
orgName: string;
/** Total workspace count so the header can read "12 workspaces". */
workspaceCount: number;
/** Env vars the server has declared MUST be set as global secrets.
* Import is disabled until every entry here is configured. Entries
* are either a single key name or an any-of group. */
requiredEnv: EnvRequirement[];
/** Env vars the server suggests import can proceed without them,
* but the user sees them listed so they can decide. Same union
* shape as `requiredEnv`. */
recommendedEnv: EnvRequirement[];
/** Names of env vars already configured globally. Used to strike
* through entries the user has already set up in another
* session. Passed in rather than queried inside the modal so the
* parent can refresh after each save without prop-driven effects. */
configuredKeys: Set<string>;
/** Called after a successful secret save so the parent can refresh
* `configuredKeys`. */
onSecretSaved: () => void;
/** User clicked Import with all required envs satisfied. */
onProceed: () => void;
/** User dismissed the modal. Import is NOT fired. */
onCancel: () => void;
}
interface DraftEntry {
key: string;
value: string;
saving: boolean;
error: string | null;
}
/**
* OrgImportPreflightModal
* -----------------------
* Two-tier env preflight before POST /org/import:
*
* - REQUIRED section (red, blocking) every entry MUST be configured
* globally before the Import button enables. Matches the server-
* side preflight that would 412 the import anyway.
*
* - RECOMMENDED section (yellow, non-blocking) listed so the user
* can add them if they want the full experience, but the Import
* button stays enabled regardless.
*
* Saving goes to the GLOBAL secrets endpoint (PUT /settings/secrets)
* because org-level templates deploy shared resources. Per-workspace
* overrides still work via the Config tab on an individual node
* after import. The modal does NOT enable Import the moment a key is
* typed only after it saves successfully (so a half-entered token
* can't proceed and then fail at container-start time instead).
*/
export function OrgImportPreflightModal({
open,
orgName,
workspaceCount,
requiredEnv,
recommendedEnv,
configuredKeys,
onSecretSaved,
onProceed,
onCancel,
}: Props) {
const [drafts, setDrafts] = useState<Record<string, DraftEntry>>({});
// Flatten the union-shaped requirement lists to the set of every key
// that could ever appear as an input row. Used purely to seed the
// drafts map — satisfaction semantics still read from the grouped
// EnvRequirement entries (a group can be satisfied by any one
// member).
const allMemberKeys = useMemo(() => {
const keys: string[] = [];
for (const r of requiredEnv) keys.push(...envReqMembers(r));
for (const r of recommendedEnv) keys.push(...envReqMembers(r));
return keys;
}, [requiredEnv, recommendedEnv]);
// Seed a draft entry per declared key the first time the modal
// opens. Entries persist across `configuredKeys` changes so a mid-
// save recheck doesn't wipe what the user typed.
//
// Dep: derive a STABLE string from the env-name lists rather than
// the array refs themselves. The parent computes
// `preflight.org.required_env ?? []`, which produces a fresh []
// identity on every re-render (e.g. when refreshConfiguredKeys
// bumps state); depending on the array refs would re-fire the
// effect on every parent render and mask any future edit that
// drops the `if (!next[k])` guard as a silent input-reset bug.
const envKeysSignature = useMemo(
() => [...allMemberKeys].sort().join("|"),
[allMemberKeys],
);
useEffect(() => {
if (!open) return;
setDrafts((prev) => {
const next = { ...prev };
for (const k of allMemberKeys) {
if (!next[k]) {
next[k] = { key: k, value: "", saving: false, error: null };
}
}
return next;
});
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [open, envKeysSignature]);
const missingRequired = useMemo(
() => requiredEnv.filter((r) => !envReqSatisfied(r, configuredKeys)),
[requiredEnv, configuredKeys],
);
const missingRecommended = useMemo(
() => recommendedEnv.filter((r) => !envReqSatisfied(r, configuredKeys)),
[recommendedEnv, configuredKeys],
);
const canProceed = missingRequired.length === 0;
// Synchronous in-flight gate. A ref (not state) so two clicks
// dispatched in the SAME microtask both see the gate flip — state
// commits don't help here because setState is async. The previous
// closure-based `current.saving` gate worked under React Testing
// Library's act() flushing but failed for true microtask-level
// double-fires (programmatic clicks, dblclick events, Enter-spam
// before React commits). Set is keyed by env var name so different
// rows can save concurrently.
const inFlightRef = useRef<Set<string>>(new Set());
// Latest-drafts ref so saveOne can read the current input value
// without taking `drafts` as a useCallback dep — that dep would
// re-create saveOne on every keystroke and re-bind every Save
// button's onClick handler, churn that scales with row count.
const draftsRef = useRef(drafts);
useEffect(() => {
draftsRef.current = drafts;
}, [drafts]);
const saveOne = useCallback(
async (key: string) => {
// Microtask-safe gate: claim the slot synchronously BEFORE any
// await so a second click in the same tick bounces immediately.
if (inFlightRef.current.has(key)) return;
const current = draftsRef.current[key];
if (!current || !current.value.trim()) return;
inFlightRef.current.add(key);
const startValue = current.value;
setDrafts((d) => ({
...d,
[key]: { ...d[key], saving: true, error: null },
}));
try {
await createSecret("global", key, startValue);
setDrafts((d) => ({
...d,
[key]: { ...d[key], value: "", saving: false, error: null },
}));
// Let the parent refresh configuredKeys so the strike-through
// updates and canProceed recomputes.
onSecretSaved();
} catch (e) {
setDrafts((d) => ({
...d,
[key]: {
...d[key],
saving: false,
error: e instanceof Error ? e.message : "Save failed",
},
}));
} finally {
inFlightRef.current.delete(key);
}
},
[onSecretSaved],
);
if (!open) return null;
// Portal the dialog to document.body so it escapes any ancestor
// containing block. TemplatePalette renders this modal inside a
// sidebar whose `fixed` container plus backdrop-filter together
// re-anchor descendants' `position: fixed` to the sidebar's own
// bounds instead of the viewport — the modal ends up glued to the
// sidebar's scrollable region and only becomes visible after the
// user scrolls the sidebar. Portal dodges that class of issue
// once and for all, regardless of what future wrappers do.
//
// SSR-safe guard: `document` is undefined on the server. Since
// the modal is gated by `if (!open) return null` above, this
// effectively only runs after open flips true on the client.
if (typeof document === "undefined") return null;
return createPortal(
<div
role="dialog"
aria-modal="true"
aria-labelledby="org-preflight-title"
className="fixed inset-0 z-50 flex items-center justify-center bg-black/70"
onClick={onCancel}
>
<div
className="w-[560px] max-h-[80vh] overflow-auto rounded-xl bg-zinc-900 border border-zinc-700 shadow-2xl"
onClick={(e) => e.stopPropagation()}
>
<header className="px-5 py-4 border-b border-zinc-800">
<h2 id="org-preflight-title" className="text-sm font-semibold text-zinc-100">
Deploy {orgName}
</h2>
<p className="mt-0.5 text-[11px] text-zinc-500">
{workspaceCount} workspace{workspaceCount === 1 ? "" : "s"}.
Review the credentials needed before import.
</p>
</header>
<section className="p-5 space-y-5">
{requiredEnv.length > 0 && (
<EnvList
tone="required"
title="Required"
subtitle="Import is blocked until every key below is saved globally."
entries={requiredEnv}
configuredKeys={configuredKeys}
drafts={drafts}
onChange={(key, value) =>
setDrafts((d) => ({ ...d, [key]: { ...d[key], value } }))
}
onSave={saveOne}
/>
)}
{recommendedEnv.length > 0 && (
<EnvList
tone="recommended"
title="Recommended"
subtitle="Not required, but some features degrade without them. Add them now for the best experience."
entries={recommendedEnv}
configuredKeys={configuredKeys}
drafts={drafts}
onChange={(key, value) =>
setDrafts((d) => ({ ...d, [key]: { ...d[key], value } }))
}
onSave={saveOne}
/>
)}
{requiredEnv.length === 0 && recommendedEnv.length === 0 && (
<p className="text-[12px] text-zinc-400">
No additional credentials required for this template.
</p>
)}
</section>
<footer className="px-5 py-3 border-t border-zinc-800 flex items-center justify-between">
<button
type="button"
onClick={onCancel}
className="px-3 py-1.5 text-[11px] rounded bg-zinc-800 hover:bg-zinc-700 text-zinc-300"
>
Cancel
</button>
<div className="flex items-center gap-2">
{missingRecommended.length > 0 && canProceed && (
<span className="text-[10px] text-amber-400/90">
{missingRecommended.length} recommended key
{missingRecommended.length === 1 ? "" : "s"} still unset
</span>
)}
<button
type="button"
onClick={onProceed}
disabled={!canProceed}
className="px-4 py-1.5 text-[11px] font-semibold rounded bg-blue-600 hover:bg-blue-500 text-white disabled:bg-zinc-700 disabled:text-zinc-500 disabled:cursor-not-allowed"
>
Import
</button>
</div>
</footer>
</div>
</div>,
document.body,
);
}
interface EnvListProps {
tone: "required" | "recommended";
title: string;
subtitle: string;
entries: EnvRequirement[];
configuredKeys: Set<string>;
drafts: Record<string, DraftEntry>;
onChange: (key: string, value: string) => void;
onSave: (key: string) => void;
}
function EnvList({
tone,
title,
subtitle,
entries,
configuredKeys,
drafts,
onChange,
onSave,
}: EnvListProps) {
const accent =
tone === "required"
? "border-red-800/60 bg-red-950/20"
: "border-amber-800/50 bg-amber-950/15";
const headerColor =
tone === "required" ? "text-red-300" : "text-amber-300";
return (
<div className={`rounded-lg border ${accent} p-3`}>
<h3 className={`text-[11px] font-semibold uppercase tracking-wide ${headerColor}`}>
{title}
</h3>
<p className="mt-0.5 mb-2 text-[10px] text-zinc-400">{subtitle}</p>
<ul className="space-y-2">
{entries.map((entry) =>
typeof entry === "string" ? (
<StrictEnvRow
key={envReqKey(entry)}
envKey={entry}
configured={configuredKeys.has(entry)}
draft={drafts[entry]}
onChange={onChange}
onSave={onSave}
/>
) : (
<AnyOfEnvGroup
key={envReqKey(entry)}
members={entry.any_of}
configuredKeys={configuredKeys}
drafts={drafts}
onChange={onChange}
onSave={onSave}
/>
),
)}
</ul>
</div>
);
}
interface StrictEnvRowProps {
envKey: string;
configured: boolean;
draft: DraftEntry | undefined;
onChange: (key: string, value: string) => void;
onSave: (key: string) => void;
}
function StrictEnvRow({
envKey,
configured,
draft: d,
onChange,
onSave,
}: StrictEnvRowProps) {
return (
<li className="flex items-center gap-2 rounded bg-zinc-900/70 border border-zinc-800 px-2 py-1.5">
<code
className={`text-[11px] font-mono flex-1 ${
configured ? "text-zinc-500 line-through" : "text-zinc-200"
}`}
>
{envKey}
</code>
{configured ? (
<span className="text-[10px] text-emerald-400"> set</span>
) : (
<>
<input
type="password"
aria-label={`Value for ${envKey}`}
placeholder="paste value"
value={d?.value ?? ""}
onChange={(e) => onChange(envKey, e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
onSave(envKey);
}
}}
disabled={d?.saving}
className="flex-1 px-2 py-1 rounded bg-zinc-800 border border-zinc-700 text-[11px] text-zinc-200 focus:outline-none focus:border-blue-500 disabled:opacity-50"
/>
<button
type="button"
onClick={() => onSave(envKey)}
disabled={d?.saving || !d?.value.trim()}
className="px-2 py-1 text-[10px] rounded bg-blue-600 hover:bg-blue-500 text-white disabled:opacity-40 disabled:cursor-not-allowed"
>
{d?.saving ? "…" : "Save"}
</button>
</>
)}
{d?.error && (
<span className="text-[9px] text-red-400 basis-full pl-1">
{d.error}
</span>
)}
</li>
);
}
interface AnyOfEnvGroupProps {
members: string[];
configuredKeys: Set<string>;
drafts: Record<string, DraftEntry>;
onChange: (key: string, value: string) => void;
onSave: (key: string) => void;
}
/**
* Renders an OR group: the user only needs to configure ONE of the
* members to satisfy the requirement. Once any member is configured
* the group shows a green banner identifying the satisfying key; the
* other inputs remain visible but muted so the user can still switch
* providers if they want (uncommon but cheap to support).
*/
function AnyOfEnvGroup({
members,
configuredKeys,
drafts,
onChange,
onSave,
}: AnyOfEnvGroupProps) {
const satisfiedBy = members.find((m) => configuredKeys.has(m));
return (
<li className="rounded border border-zinc-800 bg-zinc-900/50 px-2.5 py-2">
<div className="flex items-center justify-between mb-1.5">
<span className="text-[10px] uppercase tracking-wide text-zinc-400">
Configure any one
</span>
{satisfiedBy && (
<span className="text-[10px] text-emerald-400">
using <code className="font-mono">{satisfiedBy}</code>
</span>
)}
</div>
<ul className="space-y-1.5">
{members.map((m) => {
const isConfigured = configuredKeys.has(m);
const d = drafts[m];
const dimmed = !!satisfiedBy && !isConfigured;
return (
<li
key={m}
className={`flex items-center gap-2 rounded bg-zinc-900/70 border border-zinc-800 px-2 py-1 ${
dimmed ? "opacity-50" : ""
}`}
>
<code
className={`text-[11px] font-mono flex-1 ${
isConfigured ? "text-zinc-500 line-through" : "text-zinc-200"
}`}
>
{m}
</code>
{isConfigured ? (
<span className="text-[10px] text-emerald-400"> set</span>
) : (
<>
<input
type="password"
aria-label={`Value for ${m}`}
placeholder="paste value"
value={d?.value ?? ""}
onChange={(e) => onChange(m, e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
onSave(m);
}
}}
disabled={d?.saving}
className="flex-1 px-2 py-1 rounded bg-zinc-800 border border-zinc-700 text-[11px] text-zinc-200 focus:outline-none focus:border-blue-500 disabled:opacity-50"
/>
<button
type="button"
onClick={() => onSave(m)}
disabled={d?.saving || !d?.value.trim()}
className="px-2 py-1 text-[10px] rounded bg-blue-600 hover:bg-blue-500 text-white disabled:opacity-40 disabled:cursor-not-allowed"
>
{d?.saving ? "…" : "Save"}
</button>
</>
)}
{d?.error && (
<span className="text-[9px] text-red-400 basis-full pl-1">
{d.error}
</span>
)}
</li>
);
})}
</ul>
</li>
);
}

View File

@ -65,6 +65,12 @@ export function ProvisioningTimeout({
// banner even if they stay in provisioning. Cleared when the
// workspace leaves provisioning (status changes).
const [dismissed, setDismissed] = useState<Set<string>>(new Set());
// Watch the live WS health. While it's not "connected", local node
// status reflects the last event we received before the drop —
// workspaces may have actually transitioned to online minutes ago.
// Suppress the banner until WS recovers + rehydrate confirms each
// workspace is genuinely still provisioning.
const wsStatus = useCanvasStore((s) => s.wsStatus);
// Subscribe to provisioning nodes — use shallow compare to avoid infinite re-render
// (filter+map creates new array reference on every store update).
@ -273,8 +279,11 @@ export function ProvisioningTimeout({
}, []);
const visibleTimedOut = useMemo(
() => timedOut.filter((e) => !dismissed.has(e.workspaceId)),
[timedOut, dismissed],
() =>
wsStatus === "connected"
? timedOut.filter((e) => !dismissed.has(e.workspaceId))
: [],
[timedOut, dismissed, wsStatus],
);
if (visibleTimedOut.length === 0) return null;

View File

@ -29,7 +29,7 @@ const TABS: { id: PanelTab; label: string; icon: string }[] = [
{ id: "chat", label: "Chat", icon: "◈" },
{ id: "activity", label: "Activity", icon: "⊙" },
{ id: "details", label: "Details", icon: "◉" },
{ id: "skills", label: "Skills", icon: "✦" },
{ id: "skills", label: "Plugins", icon: "✦" },
{ id: "terminal", label: "Terminal", icon: "▸" },
{ id: "config", label: "Config", icon: "⚙" },
{ id: "schedule", label: "Schedule", icon: "⏲" },
@ -280,7 +280,7 @@ export function SidePanel() {
className="flex-1 overflow-y-auto focus:outline-none"
>
{panelTab === "details" && <DetailsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "skills" && <SkillsTab key={selectedNodeId} data={node.data} />}
{panelTab === "skills" && <SkillsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "activity" && <ActivityTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "chat" && <ChatTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "terminal" && <TerminalTab key={selectedNodeId} workspaceId={selectedNodeId} />}

View File

@ -1,35 +1,48 @@
"use client";
import { useState, useEffect, useCallback, useRef } from "react";
import { flushSync } from "react-dom";
import { api } from "@/lib/api";
import { useCanvasStore } from "@/store/canvas";
import type { WorkspaceData } from "@/store/socket";
import { checkDeploySecrets, type PreflightResult, type ModelSpec } from "@/lib/deploy-preflight";
import { MissingKeysModal } from "./MissingKeysModal";
import { type Template } from "@/lib/deploy-preflight";
import { useTemplateDeploy } from "@/hooks/useTemplateDeploy";
import {
OrgImportPreflightModal,
type EnvRequirement,
} from "./OrgImportPreflightModal";
import { ConfirmDialog } from "./ConfirmDialog";
import { Spinner } from "./Spinner";
import { showToast } from "./Toaster";
import { TIER_CONFIG } from "@/lib/design-tokens";
import { listSecrets } from "@/lib/api/secrets";
interface Template {
id: string;
name: string;
description: string;
tier: number;
runtime?: string;
model: string;
models?: ModelSpec[];
/** AND-required env vars declared at runtime_config.required_env. */
required_env?: string[];
skills: string[];
skill_count: number;
}
// `Template` type and `resolveRuntime` helper now live in
// `@/lib/deploy-preflight` so EmptyState can import the same ones. Was
// redeclared here + a narrower redeclaration in EmptyState; the
// narrower one dropped `runtime`, `models`, `required_env`, which is
// exactly the data the preflight needs. See reviewer's "runtime
// fallback drift" note — single source of truth closes the drift.
export interface OrgTemplate {
dir: string;
name: string;
description: string;
workspaces: number;
/** Env vars that MUST be set as global secrets before the org can
* import. Server refuses the import with 412 if any are missing;
* the canvas preflights against /secrets/list to avoid the round
* trip. Aggregated from org-level + every workspace in the tree.
*
* Each entry is either a key name (strict) or an `{any_of: [...]}`
* group (any one of the listed members satisfies the requirement
* e.g. `ANTHROPIC_API_KEY` OR `CLAUDE_CODE_OAUTH_TOKEN`). */
required_env?: EnvRequirement[];
/** "Nice-to-have" tier. Import proceeds without them but features
* may degrade a channel's webhook posts get dropped, a fallback
* LLM isn't available, etc. Surfaced to the user as a non-blocking
* warning with an "add now" affordance. Same union shape as
* `required_env`. */
recommended_env?: EnvRequirement[];
}
/** Fetch the list of org templates from the platform. Returns [] on error
@ -91,6 +104,14 @@ export function OrgTemplatesSection() {
const [loading, setLoading] = useState(false);
const [importing, setImporting] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
// Preflight modal state. `preflight` is non-null when the user
// clicked Import on an org with declared required/recommended envs
// and we're waiting for them to confirm; null otherwise (direct
// import path for orgs with zero env requirements).
const [preflight, setPreflight] = useState<{
org: OrgTemplate;
configuredKeys: Set<string>;
} | null>(null);
// Collapsed by default — org templates are multi-workspace imports
// that most new users don't reach for first. Keeping them
// expand-on-demand frees ~400 px of vertical space for the
@ -109,21 +130,55 @@ export function OrgTemplatesSection() {
loadOrgs();
}, [loadOrgs]);
const handleImport = async (org: OrgTemplate) => {
/** Fetch the set of global secret KEYS that are already configured.
* Used to strike through already-set entries in the preflight modal
* and to decide whether the import needs the modal at all. */
const loadConfiguredKeys = useCallback(async (): Promise<Set<string>> => {
try {
const secrets = await listSecrets("global");
return new Set(secrets.map((s) => s.name));
} catch {
// Secrets endpoint unreachable → assume nothing configured.
// The server will refuse the import with 412 and the user
// retries; safer than letting the import fly blind.
return new Set();
}
}, []);
/** Actually run the import. Split out so both the "no preflight
* needed" fast path and the "preflight modal approved" path can
* share the fetch + hydrate + toast sequence. */
const doImport = useCallback(async (org: OrgTemplate) => {
setImporting(org.dir);
setError(null);
try {
await importOrgTemplate(org.dir);
// Refresh canvas inline — the WebSocket may be offline, in which case
// WORKSPACE_PROVISIONING broadcasts never arrive and the user sees
// no change from clicking "Import org". A direct fetch guarantees
// the new workspaces land on canvas regardless of WS state.
try {
const workspaces = await api.get<WorkspaceData[]>("/workspaces");
useCanvasStore.getState().hydrate(workspaces);
} catch {
// Rehydrate failure is non-fatal; WS (if alive) or the next
// health-check cycle will eventually pick the new workspaces up.
// Hydrate is the safety net for the "WS is offline" case —
// without live events the canvas stays empty. But calling it
// immediately wipes the org-deploy animation (hydrate rebuilds
// the node array from scratch, dropping the spawn / shimmer
// classes and position tweens). So:
// 1. If the number of nodes on the canvas already matches
// (or exceeds) the template's workspace count, WS
// delivered everything — skip hydrate.
// 2. Otherwise, wait a short window to let any in-flight WS
// events land, then hydrate only if still behind.
const expectedCount = org.workspaces;
// Nodes transition through WORKSPACE_REMOVED which physically
// drops them from the store — there is no "removed" status in
// WorkspaceNodeData — so a simple length check is enough here.
const hasAll = () => useCanvasStore.getState().nodes.length >= expectedCount;
if (!hasAll()) {
await new Promise((r) => setTimeout(r, 1500));
}
if (!hasAll()) {
try {
const workspaces = await api.get<WorkspaceData[]>("/workspaces");
useCanvasStore.getState().hydrate(workspaces);
} catch {
// WS (if alive) or the next health-check cycle will
// eventually pick the new workspaces up.
}
}
showToast(`Imported "${org.name || org.dir}" (${org.workspaces} workspaces)`, "success");
} catch (e) {
@ -133,7 +188,45 @@ export function OrgTemplatesSection() {
} finally {
setImporting(null);
}
};
}, []);
/** Entry point for the Import button. Two paths:
*
* 1. No env declared by the template (required_env + recommended_env
* both empty) fire doImport directly. Matches the pre-preflight
* behaviour for existing templates.
*
* 2. Any env declared load the configured-keys set and open the
* preflight modal. doImport runs only when the user clicks
* Import inside the modal, which is gated to "required envs all
* configured" by the modal itself. */
const handleImport = useCallback(async (org: OrgTemplate) => {
const hasEnvDeclarations =
(org.required_env && org.required_env.length > 0) ||
(org.recommended_env && org.recommended_env.length > 0);
if (!hasEnvDeclarations) {
void doImport(org);
return;
}
// Flip the button to its "Importing…" state while the secrets
// lookup runs — on a tenant with 500+ global secrets the round
// trip can be > 200 ms and the user otherwise gets zero visual
// feedback after clicking. Cleared on modal close / error.
setImporting(org.dir);
try {
const configuredKeys = await loadConfiguredKeys();
setPreflight({ org, configuredKeys });
} finally {
setImporting(null);
}
}, [doImport, loadConfiguredKeys]);
/** Called by the preflight modal after a successful key save so the
* strike-through re-renders and canProceed recomputes. */
const refreshConfiguredKeys = useCallback(async () => {
const keys = await loadConfiguredKeys();
setPreflight((prev) => (prev ? { ...prev, configuredKeys: keys } : prev));
}, [loadConfiguredKeys]);
return (
<div className="space-y-2" data-testid="org-templates-section">
@ -222,6 +315,35 @@ export function OrgTemplatesSection() {
})}
</div>
)}
{preflight && (
<OrgImportPreflightModal
open
orgName={preflight.org.name || preflight.org.dir}
workspaceCount={preflight.org.workspaces}
requiredEnv={preflight.org.required_env ?? []}
recommendedEnv={preflight.org.recommended_env ?? []}
configuredKeys={preflight.configuredKeys}
onSecretSaved={refreshConfiguredKeys}
onProceed={() => {
const org = preflight.org;
// flushSync guarantees the modal unmounts BEFORE we kick
// off the import network call. Without it, React batches
// setPreflight(null) with the setImporting(...) from
// doImport's synchronous prefix, both commit at the end
// of this handler, AND the await import() POST may yield
// a microtask before React schedules the paint. Net
// effect: the modal backdrop sat over the canvas during
// the first wave of WORKSPACE_PROVISIONING WS events,
// hiding the spawn animation. Force the close to land
// first so the user sees the canvas reveal + agents
// popping into place.
flushSync(() => setPreflight(null));
void doImport(org);
}}
onCancel={() => setPreflight(null)}
/>
)}
</div>
);
}
@ -319,14 +441,6 @@ export function TemplatePalette() {
const [templates, setTemplates] = useState<Template[]>([]);
const [loading, setLoading] = useState(false);
const [creating, setCreating] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
// Missing keys modal state
const [missingKeysInfo, setMissingKeysInfo] = useState<{
template: Template;
preflight: PreflightResult;
} | null>(null);
const loadTemplates = useCallback(async () => {
setLoading(true);
@ -344,65 +458,15 @@ export function TemplatePalette() {
if (open) loadTemplates();
}, [open, loadTemplates]);
/** Resolve runtime from template ID (e.g., "langgraph", "claude-code-default" → "claude-code") */
const resolveRuntime = (templateId: string): string => {
const runtimeMap: Record<string, string> = {
langgraph: "langgraph",
"claude-code-default": "claude-code",
openclaw: "openclaw",
deepagents: "deepagents",
crewai: "crewai",
autogen: "autogen",
};
return runtimeMap[templateId] ?? templateId.replace(/-default$/, "");
};
/** Actually execute the deploy API call */
const executeDeploy = useCallback(async (template: Template) => {
setCreating(template.id);
setError(null);
try {
await api.post("/workspaces", {
name: template.name,
template: template.id,
tier: template.tier,
canvas: {
x: Math.random() * 400 + 100,
y: Math.random() * 300 + 100,
},
});
setCreating(null);
} catch (e) {
setError(e instanceof Error ? e.message : "Failed to deploy");
setCreating(null);
}
}, []);
/** Pre-deploy check: validate secrets before deploying */
const handleDeploy = async (template: Template) => {
setCreating(template.id);
setError(null);
// Prefer the runtime the Go /templates endpoint returned verbatim —
// resolveRuntime() is a legacy id→runtime fallback for installs whose
// template summary predates the `runtime` field.
const runtime = template.runtime ?? resolveRuntime(template.id);
const preflight = await checkDeploySecrets({
runtime,
models: template.models,
required_env: template.required_env,
});
if (!preflight.ok) {
// Missing keys — show the modal instead of deploying
setMissingKeysInfo({ template, preflight });
setCreating(null);
return;
}
// All keys present — deploy directly
await executeDeploy(template);
};
// Preflight + POST + modal wiring moved into useTemplateDeploy so
// this component and EmptyState use one implementation. The sidebar
// uses the hook's default random canvas placement (no override) —
// an already-populated canvas shouldn't have new deploys stacking on
// a single fixed point. No post-deploy side effect either: the
// palette is operator-triggered, so auto-selecting would yank
// focus off whatever the user was already looking at.
const { deploy: handleDeploy, deploying: creating, error, modal } =
useTemplateDeploy();
return (
<>
@ -426,21 +490,9 @@ export function TemplatePalette() {
</svg>
</button>
{/* Missing Keys Modal */}
<MissingKeysModal
open={!!missingKeysInfo}
missingKeys={missingKeysInfo?.preflight.missingKeys ?? []}
providers={missingKeysInfo?.preflight.providers ?? []}
runtime={missingKeysInfo?.preflight.runtime ?? ""}
onKeysAdded={() => {
if (missingKeysInfo) {
const template = missingKeysInfo.template;
setMissingKeysInfo(null);
executeDeploy(template);
}
}}
onCancel={() => setMissingKeysInfo(null)}
/>
{/* Missing-keys modal rendered by the shared hook. Same
instance shape used by EmptyState. */}
{modal}
{/* Sidebar */}
{open && (
@ -483,7 +535,7 @@ export function TemplatePalette() {
<button
type="button"
key={t.id}
onClick={() => handleDeploy(t)}
onClick={() => void handleDeploy(t)}
disabled={isDeploying}
className="w-full text-left bg-zinc-800/40 hover:bg-zinc-800/70 border border-zinc-700/40 hover:border-zinc-600/50 rounded-xl p-3 transition-all disabled:opacity-50 disabled:cursor-not-allowed disabled:hover:bg-zinc-800/40 disabled:hover:border-zinc-700/40 group focus:outline-none focus-visible:ring-2 focus-visible:ring-blue-500/70"
>

View File

@ -6,6 +6,8 @@ import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
import { showToast } from "@/components/Toaster";
import { Tooltip } from "@/components/Tooltip";
import { STATUS_CONFIG, TIER_CONFIG } from "@/lib/design-tokens";
import { useOrgDeployState } from "@/components/canvas/useOrgDeployState";
import { OrgCancelButton } from "@/components/canvas/OrgCancelButton";
/** Descendant count for the "N sub" badge children are first-class nodes
* rendered as full cards inside this one via React Flow's native parentId,
@ -35,6 +37,10 @@ function EjectIcon(props: React.SVGProps<SVGSVGElement>) {
export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>) {
const statusCfg = STATUS_CONFIG[data.status] || STATUS_CONFIG.offline;
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-zinc-500 bg-zinc-800" };
// Org-deploy context — four derived flags off one store subscription.
// Drives the shimmer while provisioning, the dimmed/non-draggable
// treatment on locked descendants, and the Cancel pill on the root.
const deploy = useOrgDeployState(id);
const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
const selectNode = useCanvasStore((s) => s.selectNode);
const openContextMenu = useCanvasStore((s) => s.openContextMenu);
@ -138,8 +144,21 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
}
backdrop-blur-sm
focus:outline-none focus-visible:ring-2 focus-visible:ring-blue-500/70 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950
${deploy.isActivelyProvisioning ? "mol-deploy-shimmer" : ""}
${deploy.isLockedChild ? "mol-deploy-locked" : ""}
`}
>
{/* Cancel-deployment pill rendered on the root of a deploying
org only. Positioned absolute inside the card so it moves
with drag; class="nodrag" on the button stops React Flow
from treating clicks as a drag start. */}
{deploy.isDeployingRoot && (
<OrgCancelButton
rootId={id}
rootName={data.name}
workspaceCount={deploy.descendantProvisioningCount}
/>
)}
{/* Status gradient bar at top */}
<div className={`absolute inset-x-0 top-0 h-8 bg-gradient-to-b ${statusCfg.bar} pointer-events-none`} />

View File

@ -175,9 +175,28 @@ describe("buildA2AEdges — edge properties", () => {
expect((edge.style as React.CSSProperties).pointerEvents).toBe("none");
});
it("sets pointerEvents: 'none' on labelStyle", () => {
it("tags the edge as type=a2a so React Flow renders the custom A2AEdge component", () => {
// The custom edge portals labels above the node layer and makes
// them clickable. Without type=a2a, RF falls back to the default
// edge whose label sits in the SVG group (hidden under nodes,
// pointerEvents:none). Regression guard for the hidden-label /
// unclickable-label bug observed 2026-04-25.
const [edge] = buildA2AEdges([makeRow()], NOW);
expect((edge.labelStyle as React.CSSProperties).pointerEvents).toBe("none");
expect(edge.type).toBe("a2a");
});
it("populates edge.data with the fields the custom edge component reads", () => {
// A2AEdge reads count, lastAt, isHot, label from edge.data so the
// shape upstream must keep emitting them. A future buildA2AEdges
// refactor that drops any of these silently breaks the rendered
// pill (label disappears, hot/warm color swap fails, click handler
// can still fire but the label text vanishes).
const [edge] = buildA2AEdges([makeRow()], NOW);
const data = edge.data as Record<string, unknown>;
expect(data.count).toBe(1);
expect(typeof data.lastAt).toBe("number");
expect(typeof data.isHot).toBe("boolean");
expect(data.label).toMatch(/^1 call ·/);
});
it("label uses singular 'call' for count === 1", () => {

View File

@ -72,6 +72,7 @@ const mockStoreState = {
selectedNodeIds: new Set<string>(),
clearSelection: vi.fn(),
toggleNodeSelection: vi.fn(),
deletingIds: new Set<string>(),
};
vi.mock("@/store/canvas", () => ({

View File

@ -16,7 +16,9 @@ afterEach(() => {
// ── Shared fitView spy — must be set up before vi.mock hoisting ──────────────
const mockFitView = vi.fn();
const mockFitBounds = vi.fn();
const mockGetIntersectingNodes = vi.fn(() => []);
const mockGetIntersectingNodes = vi.fn(
(): Array<{ id: string; position: { x: number; y: number } }> => [],
);
vi.mock("@xyflow/react", () => {
const ReactFlow = ({
@ -83,6 +85,12 @@ const mockStoreState = {
selectedNodeIds: new Set<string>(),
clearSelection: vi.fn(),
toggleNodeSelection: vi.fn(),
// Cascade-delete / deploy animation state (added in the multilevel-
// layout-UX bundle). Canvas.tsx reads deletingIds.size to decide
// whether to apply the "locked during delete" class on each node;
// an empty Set mirrors the idle canvas and doesn't interact with
// any pan/fit behaviour under test here.
deletingIds: new Set<string>(),
};
vi.mock("@/store/canvas", () => ({

View File

@ -0,0 +1,225 @@
// @vitest-environment jsdom
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, cleanup, waitFor } from "@testing-library/react";
// Regression tests for the OrgImportPreflightModal's save path and
// any-of group rendering. Guards two specific bugs caught in the
// UX A/B Lab rollout (2026-04-24):
//
// 1. saveOne early-returned because it tried to read a local
// `startValue` reassigned inside a functional setDrafts
// updater. React did not always evaluate the updater
// synchronously, so the gate read "" and bailed while
// `saving:true` committed at next render, wedging the
// button on "…" without ever calling createSecret.
//
// 2. Double-click / Enter-spam could race past the disabled-
// button UI gate, firing createSecret twice. The production
// endpoint is idempotent so no data hazard, but the extra
// PUT is wasteful and harder to reason about.
const createSecretMock = vi.fn().mockResolvedValue(undefined);
vi.mock("@/lib/api/secrets", () => ({
createSecret: (...args: unknown[]) => createSecretMock(...args),
}));
import { OrgImportPreflightModal } from "../OrgImportPreflightModal";
beforeEach(() => {
createSecretMock.mockClear();
createSecretMock.mockResolvedValue(undefined);
});
afterEach(() => {
cleanup();
});
describe("OrgImportPreflightModal — saveOne", () => {
it("calls createSecret exactly once when Save is clicked on an any-of member", async () => {
render(
<OrgImportPreflightModal
open
orgName="UX A/B Lab"
workspaceCount={7}
requiredEnv={[{ any_of: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"] }]}
recommendedEnv={[]}
configuredKeys={new Set()}
onSecretSaved={() => {}}
onProceed={() => {}}
onCancel={() => {}}
/>,
);
// Both any-of members render their own input + Save.
const input = screen.getByLabelText(/Value for ANTHROPIC_API_KEY/i);
fireEvent.change(input, { target: { value: "test-secret-value" } });
// The Save button adjacent to the changed input.
const saveButtons = screen
.getAllByRole("button")
.filter((b) => b.textContent === "Save");
// Two saves on screen (one per any-of member). First is ANTHROPIC.
fireEvent.click(saveButtons[0]);
await waitFor(() => {
expect(createSecretMock).toHaveBeenCalledTimes(1);
});
expect(createSecretMock).toHaveBeenCalledWith(
"global",
"ANTHROPIC_API_KEY",
"test-secret-value",
);
});
it("synchronous double-click on Save fires createSecret exactly once", async () => {
// Pause the first save so we can fire a second click while the
// first is still mid-await. The two clicks happen in the SAME
// tick — fireEvent runs synchronously through React's event
// system — so any guard that depends on a committed setState
// (e.g. `disabled={drafts[key].saving}` or a closure read of
// `drafts[key].saving`) loses the race: the second click sees
// saving=false because React hasn't committed yet. The fix is
// a useRef-based gate that flips synchronously before any await.
let resolveCreate!: () => void;
createSecretMock.mockImplementationOnce(
() => new Promise<void>((resolve) => {
resolveCreate = resolve;
}),
);
render(
<OrgImportPreflightModal
open
orgName="UX A/B Lab"
workspaceCount={7}
requiredEnv={[{ any_of: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"] }]}
recommendedEnv={[]}
configuredKeys={new Set()}
onSecretSaved={() => {}}
onProceed={() => {}}
onCancel={() => {}}
/>,
);
const input = screen.getByLabelText(/Value for ANTHROPIC_API_KEY/i);
fireEvent.change(input, { target: { value: "test-secret-value" } });
const saveButtons = screen
.getAllByRole("button")
.filter((b) => b.textContent === "Save");
// Pull the React-bound onClick once so both invocations close
// over the SAME callback — simulates a double-fire that happens
// before React reconciles between events. Without this, RTL
// flushes act() between fireEvent calls and the second click
// sees the post-commit state.
const saveBtn = saveButtons[0] as HTMLButtonElement;
saveBtn.click();
saveBtn.click();
// Give React a tick to process any queued state updates.
await waitFor(() => {
expect(createSecretMock).toHaveBeenCalledTimes(1);
});
resolveCreate();
await waitFor(() => {
// Post-save count must remain at exactly one.
expect(createSecretMock).toHaveBeenCalledTimes(1);
});
});
it("does not call createSecret when value is empty", async () => {
render(
<OrgImportPreflightModal
open
orgName="UX A/B Lab"
workspaceCount={7}
requiredEnv={[{ any_of: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"] }]}
recommendedEnv={[]}
configuredKeys={new Set()}
onSecretSaved={() => {}}
onProceed={() => {}}
onCancel={() => {}}
/>,
);
// Button is disabled when value is empty — clicking a disabled
// button still dispatches onClick in RTL (since fireEvent
// bypasses the disabled attribute), so this asserts the code-
// level gate catches it, not just the UI.
const saveButtons = screen
.getAllByRole("button")
.filter((b) => b.textContent === "Save");
fireEvent.click(saveButtons[0]);
// Small async wait to let any state updates settle.
await new Promise((r) => setTimeout(r, 50));
expect(createSecretMock).not.toHaveBeenCalled();
});
});
describe("OrgImportPreflightModal — any-of rendering", () => {
it("renders each any-of member as a separate input row", () => {
render(
<OrgImportPreflightModal
open
orgName="UX A/B Lab"
workspaceCount={7}
requiredEnv={[{ any_of: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"] }]}
recommendedEnv={[]}
configuredKeys={new Set()}
onSecretSaved={() => {}}
onProceed={() => {}}
onCancel={() => {}}
/>,
);
expect(screen.getByText("Configure any one")).toBeTruthy();
expect(screen.getByLabelText(/Value for ANTHROPIC_API_KEY/i)).toBeTruthy();
expect(screen.getByLabelText(/Value for CLAUDE_CODE_OAUTH_TOKEN/i)).toBeTruthy();
});
it("shows satisfied indicator when any member is configured, and enables Import", () => {
render(
<OrgImportPreflightModal
open
orgName="UX A/B Lab"
workspaceCount={7}
requiredEnv={[{ any_of: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"] }]}
recommendedEnv={[]}
configuredKeys={new Set(["CLAUDE_CODE_OAUTH_TOKEN"])}
onSecretSaved={() => {}}
onProceed={() => {}}
onCancel={() => {}}
/>,
);
// "✓ using CLAUDE_CODE_OAUTH_TOKEN" banner renders. Name appears
// twice (banner + member row) so use getAllByText.
expect(screen.getByText(/using/i)).toBeTruthy();
expect(screen.getAllByText("CLAUDE_CODE_OAUTH_TOKEN").length).toBeGreaterThanOrEqual(1);
const importBtn = screen.getByRole("button", { name: /^Import$/ });
expect(importBtn.hasAttribute("disabled")).toBe(false);
});
it("keeps Import disabled when no any-of member is configured", () => {
render(
<OrgImportPreflightModal
open
orgName="UX A/B Lab"
workspaceCount={7}
requiredEnv={[{ any_of: ["ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"] }]}
recommendedEnv={[]}
configuredKeys={new Set()}
onSecretSaved={() => {}}
onProceed={() => {}}
onCancel={() => {}}
/>,
);
const importBtn = screen.getByRole("button", { name: /^Import$/ });
expect(importBtn.hasAttribute("disabled")).toBe(true);
});
});

View File

@ -0,0 +1,143 @@
// @vitest-environment jsdom
//
// Behavioral coverage for the install flow. Two regressions to pin
// down:
//
// 1. The install POST URL has to include the workspace id. A pre-fix
// bug routed it to /workspaces/undefined/plugins because the
// component read `data.id`, but `WorkspaceNodeData` has no `id`
// field — its `extends Record<string, unknown>` index signature
// hid the bad access from TS. The component now takes
// `workspaceId` as an explicit prop; this test asserts the URL.
//
// 2. The optimistic install update has to flip the registry row to
// "Installed" without waiting for the 15s reload timer (the
// PLUGIN_RELOAD_DELAY_MS gap). This test asserts the row's "Install"
// button is replaced by the green "Installed" tag synchronously
// after the POST resolves.
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
const mockApiGet = vi.fn();
const mockApiPost = vi.fn();
vi.mock("@/lib/api", () => ({
api: {
get: (...args: unknown[]) => mockApiGet(...args),
post: (...args: unknown[]) => mockApiPost(...args),
put: vi.fn().mockResolvedValue({}),
del: vi.fn().mockResolvedValue({}),
patch: vi.fn().mockResolvedValue({}),
},
}));
vi.mock("@/store/canvas", () => ({
useCanvasStore: Object.assign(
vi.fn((selector: (s: Record<string, unknown>) => unknown) =>
selector({ setPanelTab: vi.fn() } as Record<string, unknown>),
),
{ getState: () => ({ setPanelTab: vi.fn() }) },
),
summarizeWorkspaceCapabilities: vi.fn(() => ({ skills: [], tools: [] })),
}));
vi.mock("../Toaster", () => ({ showToast: vi.fn() }));
import { SkillsTab } from "../tabs/SkillsTab";
function makeData() {
return {
name: "Test WS",
status: "online",
tier: 1,
agentCard: null,
activeTasks: 0,
collapsed: false,
role: "agent",
lastErrorRate: 0,
lastSampleError: "",
url: "http://localhost:9000",
parentId: null,
currentTask: "",
runtime: "langgraph",
needsRestart: false,
budgetLimit: null,
};
}
const REGISTRY = [
{
name: "browser-automation",
version: "1.1.0",
description: "Browser automation + testing",
author: "molecule",
tags: ["browser", "playwright"],
skills: [],
runtimes: ["claude-code"],
},
];
beforeEach(() => {
// Order matches the component's loadInstalled / loadRegistry
// /loadSourceSchemes calls. Schemes endpoint resolves with an
// empty list so the Install-from-source input doesn't blow up.
mockApiGet.mockReset();
mockApiPost.mockReset();
mockApiGet.mockImplementation((path: string) => {
if (path.endsWith("/plugins") && path.startsWith("/workspaces/")) {
return Promise.resolve([]); // installed
}
if (path === "/plugins") {
return Promise.resolve(REGISTRY); // registry
}
if (path === "/plugins/sources") {
return Promise.resolve({ schemes: ["github://", "local://"] });
}
return Promise.resolve(null);
});
mockApiPost.mockResolvedValue({ status: "installed", plugin: "browser-automation" });
});
afterEach(() => {
cleanup();
vi.clearAllMocks();
});
// Returns the registry row's Install button. The custom-source input
// also renders an "Install" button, so `findByRole({name: /install/})`
// throws on multiple matches; scope by the row's plugin-name text.
async function findRowInstallButton() {
const nameNode = await screen.findByText("browser-automation");
const row = nameNode.closest("div.flex.items-center.justify-between") as HTMLElement;
if (!row) throw new Error("could not locate row container for browser-automation");
const buttons = row.querySelectorAll("button");
const install = Array.from(buttons).find((b) => b.textContent?.trim() === "Install");
if (!install) throw new Error("row has no Install button (already installed?)");
return install;
}
describe("SkillsTab install flow", () => {
it("POSTs to /workspaces/<workspaceId>/plugins (no `undefined` in URL)", async () => {
render(<SkillsTab workspaceId="ws-abc-123" data={makeData() as never} />);
fireEvent.click(await findRowInstallButton());
await waitFor(() => expect(mockApiPost).toHaveBeenCalled());
expect(mockApiPost).toHaveBeenCalledWith(
"/workspaces/ws-abc-123/plugins",
{ source: "local://browser-automation" },
);
});
it("flips the registry row to 'Installed' synchronously after POST resolves (no 15s wait)", async () => {
render(<SkillsTab workspaceId="ws-abc-123" data={makeData() as never} />);
fireEvent.click(await findRowInstallButton());
// The "Installed" green tag must appear without advancing the
// reload timer — the optimistic update is the entire point of
// this fix. If this test ever regresses to needing fake timers
// + advanceTimersByTime, the optimistic path is broken.
const installedTag = await screen.findByText(/^Installed$/i);
expect(installedTag).toBeDefined();
});
});

View File

@ -123,7 +123,7 @@ describe("SkillsTab — aria-label on bare source input (WCAG 1.3.1)", () => {
});
it('install source input has aria-label="Install from source URL"', async () => {
render(<SkillsTab data={makeSkillsData() as never} />);
render(<SkillsTab workspaceId="ws-test-id" data={makeSkillsData() as never} />);
// The source input is inside the registry section (showRegistry=false initially).
// Click the "+ Install Plugin" button to reveal it.
@ -138,7 +138,7 @@ describe("SkillsTab — aria-label on bare source input (WCAG 1.3.1)", () => {
});
it("install source input is a text input (not hidden)", async () => {
render(<SkillsTab data={makeSkillsData() as never} />);
render(<SkillsTab workspaceId="ws-test-id" data={makeSkillsData() as never} />);
const installBtn = screen.getByRole("button", { name: /install plugin/i });
fireEvent.click(installBtn);

View File

@ -0,0 +1,133 @@
"use client";
import { memo } from "react";
import {
BaseEdge,
EdgeLabelRenderer,
getBezierPath,
type EdgeProps,
} from "@xyflow/react";
import { useCanvasStore } from "@/store/canvas";
/**
* Custom edge for the A2A topology overlay. Solves two problems with the
* default React Flow edge label rendering:
*
* 1. **Z-order.** The default `label` prop renders inside the edge's
* SVG group, which always sits below node DOM in React Flow. When
* a label happened to land underneath a workspace card, it was
* hidden. EdgeLabelRenderer mounts label content in a separate
* portal layer that we can pin above nodes via z-index.
*
* 2. **Clickability.** Default labels inherit `pointerEvents: none`
* from the SVG path so the user can drag through them. The
* portaled label is a regular HTML element with its own pointer
* events we set `pointerEvents: all` only on the label pill so
* drags on the edge line still pass through to the canvas.
*
* On click: selects the source workspace and switches its side panel
* to Activity, where the user can inspect the underlying delegations.
*/
interface A2AEdgeData {
count: number;
lastAt: number;
isHot: boolean;
/** Pre-formatted "5 calls · 2m ago" built upstream by buildA2AEdges
* so the same string renders here and in any future tooltip layer. */
label: string;
}
function A2AEdgeImpl({
id,
source,
sourceX,
sourceY,
targetX,
targetY,
sourcePosition,
targetPosition,
data,
style = {},
}: EdgeProps) {
const [edgePath, labelX, labelY] = getBezierPath({
sourceX,
sourceY,
sourcePosition,
targetX,
targetY,
targetPosition,
});
const selectNode = useCanvasStore((s) => s.selectNode);
const setPanelTab = useCanvasStore((s) => s.setPanelTab);
const edgeData = (data ?? {}) as Partial<A2AEdgeData>;
const labelText = edgeData.label ?? "";
const isHot = edgeData.isHot ?? false;
const count = edgeData.count ?? 0;
const handleClick = (e: React.MouseEvent) => {
e.stopPropagation();
// Select the source (the agent that initiated the delegations).
// The user's mental model when clicking the edge is "show me the
// calls FROM here" — that's the source's activity feed.
//
// Preserve the current tab when the user re-clicks the same edge
// (or another edge whose source is already selected). Yanking
// them back to Activity every click would surprise — they may
// have intentionally switched to Chat / Memory while looking at
// this peer. The first click that lands a *different* selection
// still routes them to Activity, which is the discovery affordance.
const alreadySelected =
useCanvasStore.getState().selectedNodeId === source;
selectNode(source);
if (!alreadySelected) {
setPanelTab("activity");
}
};
// The edge stroke color matches what buildA2AEdges sets on the SVG
// path style. Mirror it on the badge border so the visual identity
// (hot=violet vs warm=blue) carries to the clickable label.
const accent = isHot ? "border-violet-500/60" : "border-blue-500/60";
const accentText = isHot ? "text-violet-200" : "text-blue-200";
const ariaLabel = `${count} delegation${count === 1 ? "" : "s"} from ${
edgeData.label?.split(" · ")[1] ?? "recent"
}. Click to inspect.`;
return (
<>
<BaseEdge id={id} path={edgePath} style={style} markerEnd="url(#a2a-arrow)" />
{labelText && (
<EdgeLabelRenderer>
<div
// The label sits in a portal at the canvas root. position:
// absolute + the (labelX, labelY) translate places it at
// the edge midpoint. zIndex 5 wins against React Flow's
// node layer (default z=0) without fighting the controls
// strip (z=10).
style={{
position: "absolute",
transform: `translate(-50%, -50%) translate(${labelX}px, ${labelY}px)`,
pointerEvents: "all",
zIndex: 5,
}}
className="nodrag nopan"
>
<button
type="button"
onClick={handleClick}
aria-label={ariaLabel}
title="Open source workspace's activity feed"
className={`px-2 py-0.5 rounded-full bg-zinc-900/95 border ${accent} ${accentText} text-[10px] font-medium shadow-md shadow-black/40 backdrop-blur-sm hover:bg-zinc-800 hover:border-opacity-100 transition-colors cursor-pointer`}
>
{labelText}
</button>
</div>
</EdgeLabelRenderer>
)}
</>
);
}
export const A2AEdge = memo(A2AEdgeImpl);

View File

@ -0,0 +1,165 @@
"use client";
import { useState } from "react";
import { api } from "@/lib/api";
import { useCanvasStore } from "@/store/canvas";
import { showToast } from "@/components/Toaster";
interface Props {
/** Root workspace of the org being deployed. The cancel action
* cascades delete through workspace-server's existing recursive
* delete handler, so we only need the root id. */
rootId: string;
rootName: string;
/** Count rendered in the pill label; updated live as children
* come online (the useOrgDeployState hook recomputes on every
* status change). */
workspaceCount: number;
}
/**
* Cancel-deployment pill attached to the root of a deploying org.
* One click confirm dialog DELETE /workspaces/:rootId?confirm=true
* which cascades through every descendant server-side.
*
* Rendered inside the root's WorkspaceNode card via an absolute-
* positioned overlay so it sits visually ON the card and moves with
* drag. `className="nodrag"` stops React Flow from interpreting
* clicks here as the start of a drag gesture.
*
* Deliberately uses only `.mol-deploy-cancel*` classes for styling
* every color / easing comes from theme-tokens.css, so a future
* light-theme (or tenant-branded theme) inherits automatically.
*/
export function OrgCancelButton({ rootId, rootName, workspaceCount }: Props) {
const [confirming, setConfirming] = useState(false);
const [submitting, setSubmitting] = useState(false);
const handleCancel = async () => {
setSubmitting(true);
// Populate deletingIds with the subtree so every descendant
// (and the root) locks into the dim + non-draggable state for
// the duration of the network round-trip + server cascade —
// same treatment the regular delete gives. Otherwise the org
// looks interactive for the several seconds between click and
// the first WORKSPACE_REMOVED event.
const preState = useCanvasStore.getState();
const subtreeIds = new Set<string>();
const walkStack = [rootId];
while (walkStack.length) {
const nid = walkStack.pop()!;
subtreeIds.add(nid);
for (const n of preState.nodes) {
if (n.data.parentId === nid) walkStack.push(n.id);
}
}
preState.beginDelete(subtreeIds);
try {
await api.del<{ status: string }>(
`/workspaces/${rootId}?confirm=true`,
);
showToast(`Cancelled deployment of "${rootName}"`, "success");
// Optimistic local removal — workspace-server broadcasts
// WORKSPACE_REMOVED per node but the WS may lag; strip the
// subtree now so the user sees immediate feedback. Re-read
// the store AFTER the await: children may have landed (or
// already been removed by WS events) during the network
// round-trip. If the WS_REMOVED handler already dropped the
// root during the network call, bail out — the subtree walk
// would miss any now-orphaned descendants (handleCanvasEvent
// reparents children of a removed node upward, so they no
// longer share the original root's id as parentId).
const postDeleteState = useCanvasStore.getState();
if (!postDeleteState.nodes.some((n) => n.id === rootId)) {
return;
}
const subtree = new Set<string>();
const stack = [rootId];
while (stack.length) {
const id = stack.pop()!;
subtree.add(id);
for (const n of postDeleteState.nodes) {
if (n.data.parentId === id) stack.push(n.id);
}
}
useCanvasStore.setState({
nodes: postDeleteState.nodes.filter((n) => !subtree.has(n.id)),
edges: postDeleteState.edges.filter(
(e) => !subtree.has(e.source) && !subtree.has(e.target),
),
});
} catch (e) {
// Undo the lock so the user can try again / interact with the
// still-deploying subtree.
useCanvasStore.getState().endDelete(subtreeIds);
showToast(
e instanceof Error ? `Cancel failed: ${e.message}` : "Cancel failed",
"error",
);
} finally {
// Success path's endDelete is covered implicitly — every node
// in the subtree is stripped by the optimistic local removal
// above, and any stragglers are removed by WORKSPACE_REMOVED
// WS events whose handler is a no-op on already-missing ids.
// The deletingIds set will naturally empty as endDelete runs
// in both paths below.
useCanvasStore.getState().endDelete(subtreeIds);
setSubmitting(false);
setConfirming(false);
}
};
if (confirming) {
return (
<div
className="nodrag absolute -top-10 right-0 z-20 flex items-center gap-1.5 rounded-lg bg-zinc-900/95 px-2 py-1 shadow-lg border border-red-800/60"
onClick={(e) => e.stopPropagation()}
>
<span className="text-[10px] text-zinc-300">
Delete {workspaceCount} workspace{workspaceCount === 1 ? "" : "s"}?
</span>
<button
type="button"
onClick={handleCancel}
disabled={submitting}
className="mol-deploy-cancel px-2 py-0.5 rounded text-[10px] font-semibold"
>
{submitting ? "Deleting…" : "Yes"}
</button>
<button
type="button"
onClick={() => setConfirming(false)}
disabled={submitting}
className="px-2 py-0.5 rounded bg-zinc-700/80 hover:bg-zinc-600 text-[10px] text-zinc-200"
>
No
</button>
</div>
);
}
return (
<button
type="button"
onClick={(e) => {
// Stop the click from bubbling to React Flow (selects the
// node) — the Cancel pill is a UI surface, not a node
// activation.
e.stopPropagation();
setConfirming(true);
}}
className="nodrag mol-deploy-cancel mol-deploy-cancel-pulse absolute -top-7 right-1 z-20 flex items-center gap-1 rounded-full px-2.5 py-0.5 text-[10px] font-semibold shadow-md"
aria-label={`Cancel deployment of ${rootName}`}
>
<svg width="10" height="10" viewBox="0 0 16 16" aria-hidden="true">
<path
d="M4 4l8 8M12 4l-8 8"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
/>
</svg>
<span>Cancel ({workspaceCount})</span>
</button>
);
}

View File

@ -0,0 +1,53 @@
import { describe, it, expect } from "vitest";
import { shouldFitGrowing } from "../useCanvasViewport";
// Tests cover the auto-fit gate in isolation. The hook itself is
// effects + refs + React Flow handles, awkward to exercise directly —
// extracting the pure decision into shouldFitGrowing(...) lets us
// pin down the regression-prone logic with unit tests instead.
describe("shouldFitGrowing", () => {
it("fits the very first time (no prior snapshot)", () => {
expect(shouldFitGrowing(["a"], undefined, null, 0)).toBe(true);
});
it("fits when the prior snapshot is empty", () => {
expect(shouldFitGrowing(["a", "b"], new Set(), null, 0)).toBe(true);
});
it("fits when a brand-new id has been added since the last fit", () => {
const prev = new Set(["root", "a", "b"]);
expect(shouldFitGrowing(["root", "a", "b", "c"], prev, null, 0)).toBe(true);
});
it("respects user pan when the subtree hasn't grown", () => {
const prev = new Set(["root", "a", "b"]);
// Status update on existing node — same membership.
expect(shouldFitGrowing(["root", "a", "b"], prev, 5_000, 1_000)).toBe(false);
});
it("fits when the subtree hasn't grown but the user never panned", () => {
const prev = new Set(["root", "a", "b"]);
expect(shouldFitGrowing(["root", "a", "b"], prev, null, 1_000)).toBe(true);
});
it("fits when the subtree hasn't grown and the user panned BEFORE the last fit", () => {
const prev = new Set(["root", "a", "b"]);
expect(shouldFitGrowing(["root", "a", "b"], prev, 500, 1_000)).toBe(true);
});
it("forces fit on delete-then-add even when the count is unchanged", () => {
// Subtree was [root, a, b, c, d]. Then `d` got removed and a
// sibling `e` arrived. Same length, different membership — a
// length-only check would skip the fit and leave `e` off-screen.
const prev = new Set(["root", "a", "b", "c", "d"]);
expect(
shouldFitGrowing(["root", "a", "b", "c", "e"], prev, 5_000, 1_000),
).toBe(true);
});
it("does NOT fit on shrink-only when the user has panned (deletion alone shouldn't override exploration)", () => {
const prev = new Set(["root", "a", "b", "c"]);
expect(shouldFitGrowing(["root", "a", "b"], prev, 5_000, 1_000)).toBe(false);
});
});

View File

@ -3,11 +3,43 @@
import { useCallback, useEffect, useRef } from "react";
import { useReactFlow } from "@xyflow/react";
import { useCanvasStore } from "@/store/canvas";
import { appendClass, removeClass } from "@/store/classNames";
import {
CHILD_DEFAULT_HEIGHT,
CHILD_DEFAULT_WIDTH,
} from "@/store/canvas-topology";
/**
* Decide whether the deploy-time auto-fit should run. Pure function so
* the gate logic is unit-testable in isolation the surrounding
* useEffect tangle of refs, timers, and React Flow handles is awkward
* to exercise directly.
*
* Returns true when the auto-fit SHOULD fire:
* - the subtree contains an id that wasn't in the previous snapshot
* (a new node arrived user has lost context, force the fit
* through regardless of any user-pan in between), OR
* - the user has not panned since the last successful fit (so the
* auto-fit isn't fighting their override).
*
* `prevSubtreeIds === undefined` means no fit has ever run for this
* root treat every id as "new" and fit. `userPannedAt === null`
* means the user has never panned at all in this session fit.
*/
export function shouldFitGrowing(
currentSubtreeIds: readonly string[],
prevSubtreeIds: ReadonlySet<string> | undefined,
userPannedAt: number | null,
lastAutoFitAt: number,
): boolean {
if (!prevSubtreeIds || prevSubtreeIds.size === 0) return true;
for (const id of currentSubtreeIds) {
if (!prevSubtreeIds.has(id)) return true;
}
if (userPannedAt === null) return true;
return userPannedAt <= lastAutoFitAt;
}
/**
* Wires the two canvas-wide CustomEvent listeners and the viewport
* save/restore bookkeeping so Canvas.tsx doesn't have to.
@ -25,17 +57,79 @@ export function useCanvasViewport() {
const saveViewport = useCanvasStore((s) => s.saveViewport);
const saveTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
const panTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
const autoFitTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
// Two distinct fit timers — DO NOT collapse to one.
// - settleFitTimerRef: 1200ms one-shot run by the
// "transition from any-provisioning to none" effect (the deploy
// just finished — settle on the whole org once).
// - trackingFitTimerRef: 500ms debounced by the per-arrival
// molecule:fit-deploying-org event handler (track the org's
// bounds as children land during the deploy).
// They MUST NOT share a ref: the two effects fire interleaved
// (every WS event during a deploy resets the tracking timer; the
// settle timer arms the moment provisioning hits zero), and a
// shared ref made each effect silently clearTimeout the other's
// pending fit. Today's behavior happened to land in the right
// order out of luck; splitting the refs makes ordering independent
// of fire sequence.
const settleFitTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
const trackingFitTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
// Tracks whether any workspace was provisioning on the previous
// render so we can detect the boundary when the last one finishes
// and auto-fit the viewport around the whole tree.
const hadProvisioningRef = useRef(false);
// Respect-user-pan gate for the deploy-time auto-fit. Earlier
// revisions tried to detect user pans via `onMoveEnd`, but React
// Flow v12 fires that callback with a truthy event at the END of
// a programmatic fitView animation — so the first auto-fit we
// triggered would immediately look like a user pan and block
// every subsequent fit for the rest of the deploy, leaving the
// viewport stuck wherever the first fit landed. Now we stamp
// this ref ONLY on wheel / pointerdown / touchstart on the
// React Flow pane itself (see the effect below), which are
// unambiguous user-gesture signals.
const userPannedAtRef = useRef<number | null>(null);
const lastAutoFitAtRef = useRef(0);
useEffect(() => {
return () => {
clearTimeout(saveTimerRef.current);
clearTimeout(panTimerRef.current);
clearTimeout(autoFitTimerRef.current);
clearTimeout(settleFitTimerRef.current);
clearTimeout(trackingFitTimerRef.current);
};
}, []);
// User-gesture listeners for the respect-user-pan gate. Listens on
// `document` with capture phase and filters to events whose target
// lies inside the React Flow pane — this avoids a mount-order race
// (`.react-flow__pane` may not exist when the hook first runs if
// RF is behind a Suspense boundary) AND keeps clicks on the
// toolbar / modals / side panel from stamping user-pan-intent.
// Capture phase runs before target-phase `stopPropagation` so a
// handler elsewhere can't swallow the signal.
//
// Wheel only — NOT pointerdown. A pointerdown on the pane fires for
// ordinary clicks (deselect, click-near-a-card, modal-close-bubble)
// as well as the start of a drag-pan. Treating every pointerdown as
// "user wants to override auto-fit" meant a single accidental click
// before/during an org import locked out every subsequent fit, so
// the viewport stuck at whatever the first fit landed on while
// children kept materialising off-screen. Wheel is the canonical
// unambiguous gesture: scroll-to-pan and pinch-zoom both surface as
// wheel events. Drag-pans without an accompanying wheel are rare
// enough that letting them be overridden by a follow-up auto-fit is
// the right tradeoff.
useEffect(() => {
if (typeof window === "undefined") return;
const stamp = (e: Event) => {
const target = e.target as HTMLElement | null;
if (!target?.closest?.(".react-flow__pane")) return;
userPannedAtRef.current = Date.now();
};
const opts: AddEventListenerOptions = { passive: true, capture: true };
document.addEventListener("wheel", stamp, opts);
return () => {
document.removeEventListener("wheel", stamp, opts);
};
}, []);
@ -55,20 +149,64 @@ export function useCanvasViewport() {
hadProvisioningRef.current = hasProvisioning;
if (wasProvisioning && !hasProvisioning && nodeCount > 0) {
clearTimeout(autoFitTimerRef.current);
// Root-complete moment — every root that has children just
// finished deploying. Pop + glow once (mol-deploy-root-complete)
// then auto-fit the viewport around the whole org. Leaf-only
// roots (single workspaces with no children) are skipped so the
// effect reads as "your org landed" not "random card flickered".
const state = useCanvasStore.getState();
const rootsWithChildren = new Set<string>();
for (const n of state.nodes) {
if (n.data.parentId) continue;
if (state.nodes.some((c) => c.data.parentId === n.id)) {
rootsWithChildren.add(n.id);
}
}
if (rootsWithChildren.size > 0) {
useCanvasStore.setState({
nodes: state.nodes.map((n) =>
rootsWithChildren.has(n.id)
? { ...n, className: appendClass(n.className, "mol-deploy-root-complete") }
: n,
),
});
// Strip the one-shot class after the keyframe ends so a later
// deploy on the same node can fire it again.
window.setTimeout(() => {
const s = useCanvasStore.getState();
useCanvasStore.setState({
nodes: s.nodes.map((n) =>
rootsWithChildren.has(n.id)
? { ...n, className: removeClass(n.className, "mol-deploy-root-complete") }
: n,
),
});
}, 800);
}
clearTimeout(settleFitTimerRef.current);
// 1200ms settle delay: lets React Flow's DOM measurement pass
// resize newly-online parents before we compute bounds.
// Measuring too early gives us the pre-render skeleton bbox and
// fitView zooms to that smaller-than-real rectangle.
autoFitTimerRef.current = setTimeout(() => {
settleFitTimerRef.current = setTimeout(() => {
fitView({
// Deliberately SLOWER than the in-flight tracking fits
// (400ms). The asymmetry reads as "settling" on the
// finished org rather than "tracking" another arrival,
// which is the intended UX for the "deploy done" moment.
// Don't normalize these two durations to the same value.
duration: 1200,
padding: 0.25,
// Match the deploy-time fit padding (0.45) so end-state
// and in-flight state use the same framing — otherwise
// the final zoom-out "jumps" relative to the intermediate
// fits and looks like a mis-layout.
padding: 0.45,
// Cap zoom-in: a small tree (2-3 nodes) would otherwise end
// up at the 2x maxZoom, visually implying "something is
// wrong". 0.8 reads like "here's your whole org" even when
// the tree is small.
maxZoom: 0.8,
// wrong". 0.65 reads like "here's your whole org" even when
// the tree is small — matches deploy-time cap.
maxZoom: 0.65,
// Cap zoom-out: fitView would fall back to the component's
// minZoom=0.1 on a sparse/outlier layout, leaving the user
// staring at a postage-stamp canvas. 0.25 is the floor.
@ -92,6 +230,115 @@ export function useCanvasViewport() {
return () => window.removeEventListener("molecule:pan-to-node", handler);
}, [fitView]);
// Auto pan+zoom to the whole deploying org after each child
// arrival — DEBOUNCED. Firing fitView on every event with a
// 600ms animation meant rapid sibling arrivals (server paces 2s
// apart, HMR bursts can land faster) made the viewport lurch
// continuously, which the user read as "parent flashing around".
// We now wait until the arrivals GO QUIET for 500ms, then run
// exactly one fit. The rootId we captured on the most recent
// event drives the fit bounds. Respect-user-pan still short-
// circuits: if the user moved after our last auto-fit, we never
// fit again this deploy.
const pendingFitRootRef = useRef<string | null>(null);
// Membership snapshot of the subtree at the moment of the last
// successful auto-fit, keyed by root id. When a new event arrives,
// we compute growth as "any id in the current subtree that wasn't
// in the snapshot". An id-set rather than just a count handles the
// delete-then-add case correctly: subtree of 6 → delete one → 5 →
// a different child arrives → 6 again. A length-only comparison
// would call this "no growth" and skip the fit even though a
// brand-new node landed off-screen. The id-set sees the new id
// wasn't in the snapshot and forces the fit.
//
// Map is keyed by root id and never pruned. Acceptable today because
// org roots are UUIDs (no collisions on retry / template re-import),
// canvas sessions are per-tab, and entries are tiny. Worth a sweep
// if long-lived sessions ever start importing hundreds of orgs.
const lastFitSubtreeIdsRef = useRef<Map<string, Set<string>>>(new Map());
useEffect(() => {
const runFit = () => {
const rootCandidate = pendingFitRootRef.current;
pendingFitRootRef.current = null;
if (!rootCandidate) return;
const state = useCanvasStore.getState();
// Climb to the true root — the event's rootId is the just-
// landed child's direct parent, which may itself be nested.
let topId = rootCandidate;
let cursor = state.nodes.find((n) => n.id === topId);
while (cursor?.data.parentId) {
const up = state.nodes.find((n) => n.id === cursor!.data.parentId);
if (!up) break;
cursor = up;
topId = up.id;
}
const subtree: string[] = [];
const stack = [topId];
while (stack.length) {
const id = stack.pop()!;
subtree.push(id);
for (const n of state.nodes) {
if (n.data.parentId === id) stack.push(n.id);
}
}
if (subtree.length === 0) return;
// Growth check: did any id in the current subtree NOT appear
// in the snapshot from the last fit? If yes, fit through
// regardless of the user-pan timestamp — the user has lost
// context, the new arrival is off-screen, and the deploy is
// the primary thing they want to watch. If no, fall back to
// the user-pan respect gate so post-deploy exploration isn't
// yanked back.
if (!shouldFitGrowing(
subtree,
lastFitSubtreeIdsRef.current.get(topId),
userPannedAtRef.current,
lastAutoFitAtRef.current,
)) {
return;
}
fitView({
nodes: subtree.map((id) => ({ id })),
// Short animation — server paces children ~2s apart, so a
// 400ms fit animation reads as "smoothly tracked" rather
// than "constantly lurching". Longer durations (the earlier
// 600ms) start to overlap if the user re-triggers deploys.
duration: 400,
// Generous padding so the right-hand Communications panel,
// bottom-left Legend, and bottom-right "New Workspace"
// button don't cover the outer cards. React Flow padding
// is a fraction of viewport dims, so 0.45 ≈ ~430px of
// margin on a 960-wide canvas — enough clearance for the
// two side panels (~300px + ~280px).
padding: 0.45,
// Lower maxZoom so small orgs (2-3 cards) still zoom out
// enough to show the parent frame + children clearly with
// the padded margins. 0.65 reads as "here's the whole org"
// without getting dragged to the maxZoom by fitView's
// "fill the viewport" default.
maxZoom: 0.65,
minZoom: 0.25,
});
lastAutoFitAtRef.current = Date.now();
lastFitSubtreeIdsRef.current.set(topId, new Set(subtree));
};
const handler = (e: Event) => {
const { rootId } = (e as CustomEvent<{ rootId: string }>).detail;
// Keep the most recently-requested root. Back-to-back imports
// on two different orgs (rare — user would have to click
// Import twice within 500ms) "later wins" the viewport rather
// than ping-ponging between them. If this becomes a real
// pattern we'd flush the pending fit synchronously when
// `rootId` changes, rather than resetting the timer.
pendingFitRootRef.current = rootId;
clearTimeout(trackingFitTimerRef.current);
trackingFitTimerRef.current = setTimeout(runFit, 500);
};
window.addEventListener("molecule:fit-deploying-org", handler);
return () => window.removeEventListener("molecule:fit-deploying-org", handler);
}, [fitView]);
// Zoom to a team: fit the parent + its direct children in view.
useEffect(() => {
const handler = (e: Event) => {
@ -129,6 +376,11 @@ export function useCanvasViewport() {
const onMoveEnd = useCallback(
(_event: unknown, vp: { x: number; y: number; zoom: number }) => {
// User-pan detection moved to the wheel/pointerdown listener
// above — onMoveEnd fires for programmatic fitView too, which
// made this callback an unreliable source for user-intent
// tracking. This now only handles the debounced viewport
// save so a reload lands the user back where they were.
clearTimeout(saveTimerRef.current);
saveTimerRef.current = setTimeout(() => {
saveViewport(vp.x, vp.y, vp.zoom);

View File

@ -113,6 +113,18 @@ export function useDragHandlers(): DragHandlers {
const onNodeDragStart: OnNodeDrag<WorkspaceNode> = useCallback(
(event, node) => {
// Belt-and-braces drag-lock: the primary mechanism is the
// `draggable: false` projection in Canvas.tsx — React Flow
// won't invoke this callback for locked nodes. But a future
// change to the projection that forgets a locked subtree
// would silently allow dragging, and locked drags mid-deploy
// corrupt the spawn animation. Fall through to a state-based
// check here so the invariant stays enforced in both places.
if (node.draggable === false) {
dragStartStateRef.current = null;
return;
}
dragModifiersRef.current = {
alt: event.altKey,
meta: event.metaKey || event.ctrlKey,

View File

@ -0,0 +1,152 @@
"use client";
import { useMemo } from "react";
import { useCanvasStore } from "@/store/canvas";
/**
* Org-deploy state for a single workspace node. Computed from the
* current canvas store snapshot no per-org status field on the
* backend is required (a root "is deploying" iff any descendant in
* its subtree still reports status === "provisioning").
*
* Performance note: the first version of this hook walked the entire
* nodes array per node render O(n²) for a 50-node org. The current
* implementation computes ONE map of derived state for the whole
* canvas per nodes-array change, then each call site looks up its
* own id. The map is built inside useMemo against a cheap projection
* (id + parentId + status tuples via useShallow) so unrelated store
* mutations (drag, selection, viewport) don't re-run the walk.
*/
export interface OrgDeployState {
isActivelyProvisioning: boolean;
isDeployingRoot: boolean;
isLockedChild: boolean;
descendantProvisioningCount: number;
}
const EMPTY: OrgDeployState = {
isActivelyProvisioning: false,
isDeployingRoot: false,
isLockedChild: false,
descendantProvisioningCount: 0,
};
/** Projection used to drive the deploy-state computation. Shallow-
* compared so re-renders only happen when one of these fields
* actually changes across any node. */
interface NodeProjection {
id: string;
parentId: string | null;
status: string;
}
function buildDeployMap(
projections: NodeProjection[],
deletingIds: ReadonlySet<string>,
): Map<string, OrgDeployState> {
const byId = new Map<string, NodeProjection>();
const childrenBy = new Map<string, string[]>();
for (const p of projections) {
byId.set(p.id, p);
if (p.parentId) {
const arr = childrenBy.get(p.parentId) ?? [];
arr.push(p.id);
childrenBy.set(p.parentId, arr);
}
}
// Walk once from each node up to its root, memoising the root id.
// `rootOf.get(id)` short-circuits further walks on the same chain.
const rootOf = new Map<string, string>();
const findRoot = (id: string): string => {
const cached = rootOf.get(id);
if (cached) return cached;
let cursor: NodeProjection | undefined = byId.get(id);
let rootId = id;
while (cursor && cursor.parentId) {
const parent = byId.get(cursor.parentId);
if (!parent) break;
cursor = parent;
rootId = parent.id;
const alreadyKnown = rootOf.get(rootId);
if (alreadyKnown) {
rootId = alreadyKnown;
break;
}
}
rootOf.set(id, rootId);
return rootId;
};
// Count provisioning descendants per node. Also walk once per root
// using an iterative DFS so we don't stack-overflow on deep trees.
const countProvisioning = (rootId: string): number => {
let count = 0;
const stack = [rootId];
while (stack.length) {
const id = stack.pop()!;
const node = byId.get(id);
if (!node) continue;
if (node.status === "provisioning") count++;
const kids = childrenBy.get(id);
if (kids) stack.push(...kids);
}
return count;
};
// Per-root cache of subtree count so every descendant resolves in O(1).
const rootCount = new Map<string, number>();
const out = new Map<string, OrgDeployState>();
for (const p of projections) {
const rootId = findRoot(p.id);
let provCount = rootCount.get(rootId);
if (provCount === undefined) {
provCount = countProvisioning(rootId);
rootCount.set(rootId, provCount);
}
const rootIsDeploying = provCount > 0;
// A node being deleted gets the same visual + interaction lock
// as a deploying child. "The system owns this node right now,
// don't touch it" is the shared semantic — the user only cares
// that the card is dim and won't drag; they don't need to know
// whether it's coming up or going down.
const deleting = deletingIds.has(p.id);
out.set(p.id, {
isActivelyProvisioning: p.status === "provisioning",
isDeployingRoot: p.id === rootId && rootIsDeploying,
isLockedChild: deleting || (p.id !== rootId && rootIsDeploying),
descendantProvisioningCount:
p.id === rootId ? provCount : 0, // only roots display the count
});
}
return out;
}
/** Store-wide derived map. Recomputed whenever the `nodes` array
* reference changes which is on every store mutation that touches
* nodes, including pure position tweens. The map build is O(n) so
* a 50-node canvas costs ~50μs per tween frame; that's cheap enough
* to not need a projection layer. (An earlier attempt to narrow the
* subscription via `useShallow((s) => s.nodes.map(...))` triggered
* React 18's "getSnapshot should be cached" loop because the
* projection creates fresh object references each call shallow
* equality always sees "changed", which re-renders, which re-runs
* the selector, ad infinitum.) */
function useDeployMap(): Map<string, OrgDeployState> {
const nodes = useCanvasStore((s) => s.nodes);
const deletingIds = useCanvasStore((s) => s.deletingIds);
return useMemo(() => {
const projections = nodes.map((n) => ({
id: n.id,
parentId: n.data.parentId,
status: n.data.status,
}));
return buildDeployMap(projections, deletingIds);
}, [nodes, deletingIds]);
}
export function useOrgDeployState(nodeId: string): OrgDeployState {
const map = useDeployMap();
return map.get(nodeId) ?? EMPTY;
}

View File

@ -5,6 +5,7 @@ import { api } from "@/lib/api";
import { ConversationTraceModal } from "@/components/ConversationTraceModal";
import { type ActivityEntry } from "@/types/activity";
import { useWorkspaceName } from "@/hooks/useWorkspaceName";
import { inferA2AErrorHint } from "./chat/a2aErrorHint";
interface Props {
workspaceId: string;
@ -286,6 +287,26 @@ function ActivityRow({
);
}
const A2A_ERROR_PREFIX = "[A2A_ERROR]";
/** Render a [A2A_ERROR]-prefixed response as a structured error block
* with a stripped detail line + a cause hint. The previous raw render
* ("[A2A_ERROR] " literal in the response area) gave the user no
* signal to act on. */
function A2AErrorPreview({ label, raw }: { label: string; raw: string }) {
const detail = raw.slice(A2A_ERROR_PREFIX.length).trim() || "(no detail provided)";
const hint = inferA2AErrorHint(detail);
return (
<div>
<div className="text-[8px] text-red-400/80 uppercase tracking-wider mb-1">{label} delivery failed</div>
<div className="text-[10px] text-red-300 bg-red-950/30 border border-red-800/40 rounded p-2 space-y-1.5">
<div className="font-mono whitespace-pre-wrap break-words max-h-32 overflow-y-auto">{detail}</div>
<div className="text-[9px] text-red-300/70 leading-relaxed border-t border-red-800/30 pt-1.5">{hint}</div>
</div>
</div>
);
}
/** Extract human-readable text from A2A request/response JSON */
function MessagePreview({ label, body }: { label: string; body: Record<string, unknown> }) {
// Try to extract text from A2A message parts
@ -295,6 +316,14 @@ function MessagePreview({ label, body }: { label: string; body: Record<string, u
if (body.task && typeof body.task === "string") { text = body.task; }
if (!text && body.result && typeof body.result === "string") { text = body.result; }
if (text) {
// [A2A_ERROR]-prefixed responses get the structured error
// treatment. Bare text fallthrough renders a bland gray block
// — fine for normal replies, terrible for "[A2A_ERROR] " with
// no further context. Detect at the top of the rendering path
// so it short-circuits before the generic preview kicks in.
if (text.trimStart().startsWith(A2A_ERROR_PREFIX)) {
return <A2AErrorPreview label={label} raw={text.trimStart()} />;
}
return (
<div>
<div className="text-[8px] text-zinc-500 uppercase tracking-wider mb-1">{label}</div>

View File

@ -7,9 +7,12 @@ import { api } from "@/lib/api";
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
import { WS_URL } from "@/store/socket";
import { closeWebSocketGracefully } from "@/lib/ws-close";
import { type ChatMessage, createMessage, appendMessageDeduped } from "./chat/types";
import { extractResponseText, extractRequestText } from "./chat/message-parser";
import { type ChatMessage, type ChatAttachment, createMessage, appendMessageDeduped } from "./chat/types";
import { uploadChatFiles, downloadChatFile } from "./chat/uploads";
import { AttachmentChip, PendingAttachmentPill } from "./chat/AttachmentViews";
import { extractResponseText, extractRequestText, extractFilesFromTask } from "./chat/message-parser";
import { AgentCommsPanel } from "./chat/AgentCommsPanel";
import { appendActivityLine } from "./chat/activityLog";
import { runtimeDisplayName } from "@/lib/runtime-names";
import { ConfirmDialog } from "@/components/ConfirmDialog";
@ -21,10 +24,18 @@ interface Props {
type ChatSubTab = "my-chat" | "agent-comms";
// A2A response shape (subset). The full schema is in @a2a-js/sdk but we only
// need parts/artifacts text extraction for the synchronous fallback path.
// need parts/artifacts text + file extraction for the synchronous fallback.
interface A2AFileRef {
name?: string;
mimeType?: string;
uri?: string;
bytes?: string;
size?: number;
}
interface A2APart {
kind: string;
text: string;
text?: string;
file?: A2AFileRef;
}
interface A2AResponse {
result?: {
@ -33,25 +44,81 @@ interface A2AResponse {
};
}
/** Detect activity-log rows that the workspace's own runtime fired
* against itself but were misclassified as canvas-source. The proper
* fix is the X-Workspace-ID header from `self_source_headers()` in
* workspace/platform_auth.py, which makes the platform record
* source_id = workspace_id. But three failure modes still leak a
* self-message into "My Chat":
*
* 1. Historical rows already in the DB with source_id=NULL.
* 2. Workspace containers running pre-fix heartbeat.py / main.py
* (the fix only takes effect after an image rebuild + redeploy).
* 3. Future internal triggers added without the helper.
*
* This client-side filter recognises the heartbeat trigger by its
* exact prefix the heartbeat assembles
*
* "Delegation results are ready. Review them and take appropriate
* action:\n" + summary_lines + report_instruction
*
* in workspace/heartbeat.py. The prefix is template-fixed so a
* string match is reliable. If the heartbeat copy ever changes,
* update this constant in the same commit.
*
* This is a backstop, not the primary defence the X-Workspace-ID
* header is. Filtering content is fragile to copy edits, so keep
* the list narrow. */
const INTERNAL_SELF_MESSAGE_PREFIXES = [
"Delegation results are ready. Review them and take appropriate action",
];
function isInternalSelfMessage(text: string): boolean {
return INTERNAL_SELF_MESSAGE_PREFIXES.some((p) => text.startsWith(p));
}
// extractReplyText pulls the agent's text reply out of an A2A response.
// Mirrors the Go-side extractReplyText in workspace-server/internal/channels/manager.go.
// Concatenates ALL text parts (joined with "\n") rather than returning
// just the first. Claude Code and other runtimes commonly emit multi-
// part text replies for long content (markdown tables, code blocks),
// and the prior "first part wins" implementation silently truncated
// the rest — observed on a 15k-char Wave 1 brief that rendered only
// the table header. Mirrors extractTextsFromParts in message-parser.ts.
//
// Server-side counterpart in workspace-server/internal/channels/
// manager.go has the same single-part bug; fix that too if/when a
// channel-delivered reply (Slack, Lark, etc.) gets truncated.
function extractReplyText(resp: A2AResponse): string {
const collect = (parts: A2APart[] | undefined): string => {
if (!parts) return "";
return parts
.filter((p) => p.kind === "text")
.map((p) => p.text ?? "")
.filter(Boolean)
.join("\n");
};
const result = resp?.result;
if (result?.parts) {
for (const p of result.parts) {
if (p.kind === "text") return p.text;
}
}
const collected: string[] = [];
const fromParts = collect(result?.parts);
if (fromParts) collected.push(fromParts);
// Walk artifacts even if parts had text — some producers (Hermes
// tool calls) emit a summary in parts AND details in artifacts.
// Returning early on parts dropped the artifact body silently.
if (result?.artifacts) {
for (const a of result.artifacts) {
for (const p of a.parts || []) {
if (p.kind === "text") return p.text;
}
const t = collect(a.parts);
if (t) collected.push(t);
}
}
return "";
return collected.join("\n");
}
// Agent-returned files live on the same response shape as text —
// delegated to extractFilesFromTask in message-parser.ts, which also
// walks status.message.parts (that ChatTab's legacy text extractor
// doesn't). Single source of truth for file-part parsing across
// live chat, activity log replay, and any future consumers.
/**
* Load chat history from the activity_logs database via the platform API.
* Uses source=canvas to only get user-initiated messages (not agent-to-agent).
@ -71,16 +138,23 @@ async function loadMessagesFromDB(workspaceId: string): Promise<{ messages: Chat
for (const a of [...activities].reverse()) {
// Extract user message from request_body
const userText = extractRequestText(a.request_body);
if (userText) {
if (userText && !isInternalSelfMessage(userText)) {
messages.push(createMessage("user", userText));
}
// Extract agent response
// Extract agent response — text AND any file attachments so a
// chat reload surfaces historical download chips, not just plain
// text. `result` is nested on successful A2A responses; some
// older rows stored the raw `result` payload at the top level,
// so fall back to the body itself when `.result` is absent.
if (a.response_body) {
const text = extractResponseText(a.response_body);
if (text) {
const attachments = extractFilesFromTask(
(a.response_body.result ?? a.response_body) as Record<string, unknown>,
);
if (text || attachments.length > 0) {
const role = a.status === "error" || text.toLowerCase().startsWith("agent error") ? "system" : "agent";
messages.push({ ...createMessage(role, text), timestamp: a.created_at });
messages.push({ ...createMessage(role, text, attachments), timestamp: a.created_at });
}
}
}
@ -178,7 +252,16 @@ export function ChatTab({ workspaceId, data }: Props) {
function MyChatPanel({ workspaceId, data }: Props) {
const [messages, setMessages] = useState<ChatMessage[]>([]);
const [input, setInput] = useState("");
const [sending, setSending] = useState(!!data.currentTask);
// `sending` is strictly the "this tab kicked off a send and hasn't
// seen the reply yet" signal. Previously this was initialized from
// data.currentTask to pick up in-flight agent work on mount, but
// that conflated agent-busy (workspace heartbeat) with user-
// in-flight (local send): when the WS dropped a TASK_COMPLETE event,
// currentTask lingered, the component re-mounted with sending=true,
// and the Send button stayed disabled forever even though nothing
// local was in flight. For the "agent is busy, show spinner" UX,
// use data.currentTask directly in the render path.
const [sending, setSending] = useState(false);
const [thinkingElapsed, setThinkingElapsed] = useState(0);
const [activityLog, setActivityLog] = useState<string[]>([]);
const [loading, setLoading] = useState(true);
@ -189,6 +272,17 @@ function MyChatPanel({ workspaceId, data }: Props) {
const [error, setError] = useState<string | null>(null);
const [confirmRestart, setConfirmRestart] = useState(false);
const bottomRef = useRef<HTMLDivElement>(null);
// Files the user has picked but not yet sent. Cleared on send
// (upload success) or by the × on each pill.
const [pendingFiles, setPendingFiles] = useState<File[]>([]);
const [uploading, setUploading] = useState(false);
const fileInputRef = useRef<HTMLInputElement>(null);
// Guard against a double-click during the upload phase: React
// state updates from the click that started the upload haven't
// flushed yet, so the disabled-button logic sees `uploading=false`
// from the closure and lets a second `sendMessage` enter. A ref
// observes the latest value synchronously.
const sendInFlightRef = useRef(false);
// Load chat history from database on mount
useEffect(() => {
@ -231,8 +325,10 @@ function MyChatPanel({ workspaceId, data }: Props) {
// Dedupe in case the agent proactively pushed the same text the
// HTTP /a2a response already delivered (observed with the Hermes
// runtime, which emits both a reply body and a send_message_to_user
// push for the same content).
setMessages((prev) => appendMessageDeduped(prev, createMessage("agent", m.content)));
// push for the same content). Attachments ride along with the
// message so files returned by the A2A_RESPONSE WS path render
// their download chips.
setMessages((prev) => appendMessageDeduped(prev, createMessage("agent", m.content, m.attachments)));
}
if (sendingFromAPIRef.current && msgs.length > 0) {
setSending(false);
@ -277,12 +373,21 @@ function MyChatPanel({ workspaceId, data }: Props) {
try {
const msg = JSON.parse(event.data);
if (msg.event === "ACTIVITY_LOGGED") {
// Filter to events for THIS workspace. The platform's
// BroadcastOnly fires to every connected client, and
// without this guard a sibling workspace's a2a_send would
// surface as "→ Delegating to X..." inside the wrong
// chat panel. (workspace_id on the WS envelope is the
// workspace whose activity_log row we just wrote.)
if (msg.workspace_id !== workspaceId) return;
const p = msg.payload || {};
const type = p.activity_type as string;
const method = (p.method as string) || "";
const status = (p.status as string) || "";
const targetId = (p.target_id as string) || "";
const durationMs = p.duration_ms as number | undefined;
const summary = (p.summary as string) || "";
let line = "";
if (type === "a2a_receive" && method === "message/send") {
@ -313,17 +418,23 @@ function MyChatPanel({ workspaceId, data }: Props) {
const targetName = resolveWorkspaceName(targetId);
line = `→ Delegating to ${targetName}...`;
} else if (type === "task_update") {
const summary = (p.summary as string) || "";
if (summary) line = `${summary}`;
} else if (type === "agent_log") {
// Per-tool-use telemetry from claude_sdk_executor's
// _report_tool_use. The summary already carries an icon
// + human-readable args (📄 Read /path, ⚡ Bash: …)
// so we render it verbatim. No icon prefix here — the
// emoji at the start of summary is the visual marker.
if (summary) line = summary;
}
if (line) {
setActivityLog((prev) => [...prev.slice(-8), line]);
setActivityLog((prev) => appendActivityLine(prev, line));
}
} else if (msg.event === "TASK_UPDATED" && msg.workspace_id === workspaceId) {
const task = (msg.payload?.current_task as string) || "";
if (task) {
setActivityLog((prev) => [...prev.slice(-8), `${task}`]);
setActivityLog((prev) => appendActivityLine(prev, `${task}`));
}
}
// A2A_RESPONSE is already consumed by the store and its text is
@ -339,10 +450,35 @@ function MyChatPanel({ workspaceId, data }: Props) {
const sendMessage = async () => {
const text = input.trim();
if (!text || !agentReachable || sending) return;
const filesToSend = pendingFiles;
// Allow sending if EITHER text OR attachments are present — a user
// can drop a file with no text and the agent still receives it.
if ((!text && filesToSend.length === 0) || !agentReachable || sending || uploading) return;
// Synchronous re-entry guard — see sendInFlightRef comment.
if (sendInFlightRef.current) return;
sendInFlightRef.current = true;
// Upload attachments first so we can include URIs in the A2A
// message parts. Sequential-before-send: a message with references
// to files not yet staged would fail agent-side; staging happens
// synchronously via /chat/uploads before message/send dispatch.
let uploaded: ChatAttachment[] = [];
if (filesToSend.length > 0) {
setUploading(true);
try {
uploaded = await uploadChatFiles(workspaceId, filesToSend);
} catch (e) {
setUploading(false);
sendInFlightRef.current = false;
setError(e instanceof Error ? `Upload failed: ${e.message}` : "Upload failed");
return;
}
setUploading(false);
}
setInput("");
setMessages((prev) => [...prev, createMessage("user", text)]);
setPendingFiles([]);
setMessages((prev) => [...prev, createMessage("user", text, uploaded)]);
setSending(true);
sendingFromAPIRef.current = true;
setError(null);
@ -356,40 +492,228 @@ function MyChatPanel({ workspaceId, data }: Props) {
parts: [{ kind: "text", text: m.content }],
}));
// A2A parts: text part (if any) + file parts (per attachment). The
// agent sees both in a single turn, matching the A2A spec shape.
const parts: A2APart[] = [];
if (text) parts.push({ kind: "text", text });
for (const att of uploaded) {
parts.push({
kind: "file",
file: {
name: att.name,
mimeType: att.mimeType,
uri: att.uri,
size: att.size,
},
});
}
// A2A calls can legitimately take minutes — LLM latency +
// multi-turn tool use is common on slower providers (Hermes+minimax,
// Claude Code invoking bash/file tools, etc.). The 15s default
// would silently abort the fetch here, leaving the server to
// complete the reply and the user staring at
// "agent may be unreachable". Match the upload timeout (60s × 2)
// for the happy-path ceiling; anything longer is genuinely stuck.
api.post<A2AResponse>(`/workspaces/${workspaceId}/a2a`, {
method: "message/send",
params: {
message: {
role: "user",
messageId: crypto.randomUUID(),
parts: [{ kind: "text", text }],
parts,
},
metadata: { history },
},
})
}, { timeoutMs: 120_000 })
.then((resp) => {
// Skip if the WS A2A_RESPONSE event already handled this response.
// Both paths (WS + HTTP) check sendingFromAPIRef — whichever clears
// it first wins, the other becomes a no-op (no duplicate messages).
if (!sendingFromAPIRef.current) return;
const replyText = extractReplyText(resp);
if (replyText) {
setMessages((prev) => appendMessageDeduped(prev, createMessage("agent", replyText)));
const replyFiles = extractFilesFromTask((resp?.result ?? {}) as Record<string, unknown>);
if (replyText || replyFiles.length > 0) {
setMessages((prev) =>
appendMessageDeduped(prev, createMessage("agent", replyText, replyFiles)),
);
}
setSending(false);
sendingFromAPIRef.current = false;
sendInFlightRef.current = false;
})
.catch(() => {
// Same dedup guard as .then(): if a WS path (pendingAgentMsgs
// or ACTIVITY_LOGGED a2a_receive ok) already delivered the
// reply, sendingFromAPIRef is already false and there's
// nothing to roll back. Surfacing "Failed to send" here would
// contradict the agent reply the user is currently reading —
// exactly the false-positive observed when the HTTP request
// hung up (proxy idle / 502) after WS already won.
if (!sendingFromAPIRef.current) {
sendInFlightRef.current = false;
return;
}
setSending(false);
sendingFromAPIRef.current = false;
sendInFlightRef.current = false;
setError("Failed to send message — agent may be unreachable");
});
};
const onFilesPicked = (fileList: FileList | null) => {
if (!fileList) return;
const picked = Array.from(fileList);
// Deduplicate against current pending set by name+size — user
// picking the same file twice shouldn't append it.
setPendingFiles((prev) => {
const keyed = new Set(prev.map((f) => `${f.name}:${f.size}`));
return [...prev, ...picked.filter((f) => !keyed.has(`${f.name}:${f.size}`))];
});
if (fileInputRef.current) fileInputRef.current.value = "";
};
const removePendingFile = (index: number) =>
setPendingFiles((prev) => prev.filter((_, i) => i !== index));
// Monotonic counter so two paste events within the same wall-clock
// second still produce distinct filenames. Without this, on
// Firefox (where pasted images have an empty `file.name`), two
// pastes ~100ms apart could yield identical synthetic names AND
// identical sizes, collapsing into one attachment via the
// `name:size` dedup in onFilesPicked.
const pasteCounterRef = useRef(0);
/** Paste-from-clipboard image attachment.
*
* Browser clipboard image items arrive as `File`s whose `name` is
* often a generic "image.png" (Chrome) or empty (Firefox/Safari),
* so two consecutive screenshot pastes collide on the name+size
* dedup the file-picker uses. Re-tag each pasted image with a
* per-paste unique name so dedup keeps them apart and the upload
* pipeline (which expects a non-empty filename) is happy.
*
* Falls through to onFilesPicked via direct File[] (NOT through
* the DataTransfer constructor that throws on Safari < 14.1
* and old Edge, silently aborting the paste).
*
* Only intercepts the paste when the clipboard has at least one
* image; text-only pastes fall through to the textarea's default
* behaviour. */
const mimeToExt = (mime: string): string => {
// Avoid raw `mime.split("/")[1]` — that yields `"svg+xml"`,
// `"jpeg"`, `"webp"` etc. which produce ugly filenames and may
// trip server-side extension allowlists. Map known types
// explicitly; unknown falls back to a safe default.
if (mime === "image/svg+xml") return "svg";
if (mime === "image/jpeg") return "jpg";
if (mime === "image/png") return "png";
if (mime === "image/gif") return "gif";
if (mime === "image/webp") return "webp";
if (mime === "image/heic") return "heic";
return "png";
};
const onPasteIntoComposer = (e: React.ClipboardEvent<HTMLTextAreaElement>) => {
if (!dropEnabled) return;
const items = e.clipboardData?.items;
if (!items || items.length === 0) return;
const imageFiles: File[] = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (!item.type.startsWith("image/")) continue;
const file = item.getAsFile();
if (!file) continue;
const ext = mimeToExt(file.type);
const stamp = new Date()
.toISOString()
.replace(/[:.]/g, "-")
.slice(0, 19);
const seq = pasteCounterRef.current++;
const fname = `pasted-${stamp}-${seq}-${i}.${ext}`;
imageFiles.push(new File([file], fname, { type: file.type }));
}
if (imageFiles.length === 0) return;
e.preventDefault();
// Reuse the picker path so file-size guards, dedup, and pending-
// list state all run through the same code. Build a synthetic
// FileList-like object to avoid the DataTransfer constructor —
// that's missing on Safari < 14.1 / old Edge and would silently
// throw, leaving the paste a no-op.
addPastedFiles(imageFiles);
};
// Variant of onFilesPicked that accepts a File[] directly, sidestepping
// the DataTransfer-FileList round-trip. Same dedup + state shape.
const addPastedFiles = (files: File[]) => {
setPendingFiles((prev) => {
const keyed = new Set(prev.map((f) => `${f.name}:${f.size}`));
return [...prev, ...files.filter((f) => !keyed.has(`${f.name}:${f.size}`))];
});
};
// Drag-and-drop staging. dragDepthRef counts enter vs leave events so
// the overlay doesn't flicker when the cursor crosses nested children
// (textarea, buttons) — dragenter/dragleave fire for every boundary.
const [dragOver, setDragOver] = useState(false);
const dragDepthRef = useRef(0);
const dropEnabled = agentReachable && !sending && !uploading;
const isFileDrag = (e: React.DragEvent) =>
Array.from(e.dataTransfer.types || []).includes("Files");
const onDragEnter = (e: React.DragEvent) => {
if (!dropEnabled || !isFileDrag(e)) return;
e.preventDefault();
dragDepthRef.current += 1;
setDragOver(true);
};
const onDragOver = (e: React.DragEvent) => {
if (!dropEnabled || !isFileDrag(e)) return;
e.preventDefault();
e.dataTransfer.dropEffect = "copy";
};
const onDragLeave = (e: React.DragEvent) => {
if (!dropEnabled || !isFileDrag(e)) return;
dragDepthRef.current = Math.max(0, dragDepthRef.current - 1);
if (dragDepthRef.current === 0) setDragOver(false);
};
const onDrop = (e: React.DragEvent) => {
if (!dropEnabled || !isFileDrag(e)) return;
e.preventDefault();
dragDepthRef.current = 0;
setDragOver(false);
onFilesPicked(e.dataTransfer.files);
};
const downloadAttachment = (att: ChatAttachment) => {
// Errors here are rare but user-visible (401 on a revoked token,
// 404 if the agent deleted the file). Surface via the inline
// error banner — the message list itself stays untouched.
downloadChatFile(workspaceId, att).catch((e) => {
setError(e instanceof Error ? `Download failed: ${e.message}` : "Download failed");
});
};
const isOnline = data.status === "online" || data.status === "degraded";
return (
<div className="flex flex-col h-full">
<div
className="flex flex-col h-full relative"
onDragEnter={onDragEnter}
onDragOver={onDragOver}
onDragLeave={onDragLeave}
onDrop={onDrop}
>
{dragOver && (
<div
className="absolute inset-0 z-20 flex items-center justify-center bg-blue-500/10 border-2 border-dashed border-blue-400 rounded pointer-events-none"
aria-live="polite"
>
<div className="bg-zinc-900/90 border border-blue-400/50 rounded-lg px-4 py-2 text-xs text-blue-200">
Drop to attach
</div>
</div>
)}
{/* Messages */}
<div className="flex-1 overflow-y-auto p-3 space-y-3">
{loading && (
@ -435,9 +759,23 @@ function MyChatPanel({ workspaceId, data }: Props) {
: "bg-zinc-800/80 text-zinc-200 border border-zinc-700/30"
}`}
>
<div className="prose prose-sm prose-invert max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0">
<ReactMarkdown remarkPlugins={[remarkGfm]}>{msg.content}</ReactMarkdown>
</div>
{msg.content && (
<div className="prose prose-sm prose-invert max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0">
<ReactMarkdown remarkPlugins={[remarkGfm]}>{msg.content}</ReactMarkdown>
</div>
)}
{msg.attachments && msg.attachments.length > 0 && (
<div className={`flex flex-wrap gap-1 ${msg.content ? "mt-1.5" : ""}`}>
{msg.attachments.map((att, i) => (
<AttachmentChip
key={`${msg.id}-${i}`}
attachment={att}
onDownload={downloadAttachment}
tone={msg.role === "user" ? "user" : "agent"}
/>
))}
</div>
)}
<div className="text-[9px] text-zinc-500 mt-1">
{new Date(msg.timestamp).toLocaleTimeString()}
</div>
@ -445,8 +783,11 @@ function MyChatPanel({ workspaceId, data }: Props) {
</div>
))}
{/* Thinking indicator */}
{sending && (
{/* Thinking indicator shows when this tab is awaiting a reply
OR when the workspace heartbeat reports an in-flight task
(covers the "agent is already busy when I open the tab" case
without locking the Send button on a stale currentTask). */}
{(sending || !!data.currentTask) && (
<div className="flex justify-start">
<div className="bg-zinc-800/50 border border-zinc-700/30 rounded-lg px-3 py-2 max-w-[85%]">
<div className="flex items-center gap-2 text-xs text-zinc-400">
@ -490,7 +831,37 @@ function MyChatPanel({ workspaceId, data }: Props) {
{/* Input */}
<div className="p-3 border-t border-zinc-800">
<div className="flex gap-2">
{pendingFiles.length > 0 && (
<div className="flex flex-wrap gap-1.5 mb-2">
{pendingFiles.map((f, i) => (
<PendingAttachmentPill
key={`${f.name}-${f.size}-${i}`}
file={f}
onRemove={() => removePendingFile(i)}
/>
))}
</div>
)}
<div className="flex gap-2 items-end">
<input
ref={fileInputRef}
type="file"
multiple
className="hidden"
onChange={(e) => onFilesPicked(e.target.files)}
aria-hidden="true"
/>
<button
onClick={() => fileInputRef.current?.click()}
disabled={!agentReachable || sending || uploading}
aria-label="Attach file"
title="Attach file"
className="p-2 bg-zinc-800 hover:bg-zinc-700 border border-zinc-700 rounded-lg text-zinc-400 hover:text-zinc-200 transition-colors shrink-0 disabled:opacity-40"
>
<svg width="14" height="14" viewBox="0 0 16 16" fill="none" aria-hidden="true">
<path d="M11 6.5 7 10.5a2 2 0 1 0 2.8 2.8l4-4a3.5 3.5 0 0 0-5-5l-4.5 4.5a5 5 0 0 0 7 7l4-4" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" strokeLinejoin="round" />
</svg>
</button>
<textarea
aria-label="Message to agent"
value={input}
@ -501,17 +872,18 @@ function MyChatPanel({ workspaceId, data }: Props) {
sendMessage();
}
}}
placeholder={agentReachable ? "Send a message... (Shift+Enter for new line)" : `Agent is ${data.status}`}
onPaste={onPasteIntoComposer}
placeholder={agentReachable ? "Send a message... (Shift+Enter for new line, paste images to attach)" : `Agent is ${data.status}`}
disabled={!agentReachable || sending}
rows={1}
className="flex-1 bg-zinc-800 border border-zinc-700 rounded-lg px-3 py-2 text-xs text-zinc-200 placeholder-zinc-500 focus:outline-none focus:border-blue-500 resize-none disabled:opacity-50"
/>
<button
onClick={sendMessage}
disabled={!input.trim() || !agentReachable || sending}
disabled={(!input.trim() && pendingFiles.length === 0) || !agentReachable || sending || uploading}
className="px-4 py-2 bg-blue-600 hover:bg-blue-500 text-xs font-medium rounded-lg text-white disabled:opacity-30 transition-colors shrink-0"
>
Send
{uploading ? "Uploading…" : "Send"}
</button>
</div>
</div>

View File

@ -105,12 +105,17 @@ interface RuntimeOption {
// Fallback used when /templates can't be fetched (offline, older backend).
// Keep in sync with manifest.json workspace_templates as a defensive default.
// Model + env suggestions only flow when the backend is reachable.
//
// Runtimes that manage their own config outside the platform's config.yaml
// template. For these, a missing config.yaml is expected — the user manages
// config via the runtime's own mechanism (e.g. hermes edits
// ~/.hermes/config.yaml on the workspace EC2 via the Terminal tab or its
// own CLI). Showing a "No config.yaml found" error for these is misleading.
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["hermes", "external"]);
// template. For these, a missing config.yaml is expected and the form
// genuinely can't edit the runtime's settings (there's no platform file
// to write). Hermes is NOT on this list: it DOES ship a platform
// config.yaml via workspace-configs-templates/hermes that controls model,
// runtime_config, required_env, etc. Editing it through this form is
// exactly the point of the platform adaptor. The deep `~/.hermes/
// config.yaml` on the container is a separate runtime-internal file,
// not this one.
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external"]);
const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
{ value: "", label: "LangGraph (default)", models: [] },
@ -152,9 +157,11 @@ export function ConfigTab({ workspaceId }: Props) {
// default `LangGraph`. See GH #1894.
let wsMetadataRuntime = "";
let wsMetadataModel = "";
let wsMetadataTier: number | null = null;
try {
const ws = await api.get<{ runtime?: string }>(`/workspaces/${workspaceId}`);
const ws = await api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`);
wsMetadataRuntime = (ws.runtime || "").trim();
if (typeof ws.tier === "number") wsMetadataTier = ws.tier;
} catch { /* fall back to config.yaml */ }
try {
const m = await api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`);
@ -166,11 +173,15 @@ export function ConfigTab({ workspaceId }: Props) {
const parsed = parseYaml(res.content);
setOriginalYaml(res.content);
setRawDraft(res.content);
// Merge: config.yaml wins for fields it declares, but workspace metadata
// wins for runtime + model when config.yaml doesn't set them.
// Merge: workspace-row metadata is authoritative for the DB-backed
// fields (tier, runtime, model). config.yaml often lags — handleSave
// PATCHes tier/runtime directly and a template snapshot in the
// container can differ from the live row. Show the DB value so the
// form doesn't contradict the node badge (issue: badge=T3, form=T2).
const merged = { ...DEFAULT_CONFIG, ...parsed } as ConfigData;
if (!merged.runtime && wsMetadataRuntime) merged.runtime = wsMetadataRuntime;
if (!merged.model && wsMetadataModel) merged.model = wsMetadataModel;
if (wsMetadataRuntime) merged.runtime = wsMetadataRuntime;
if (wsMetadataModel) merged.model = wsMetadataModel;
if (wsMetadataTier !== null) merged.tier = wsMetadataTier;
setConfig(merged);
} catch {
// No platform-managed config.yaml. Some runtimes (hermes, external)
@ -185,6 +196,7 @@ export function ConfigTab({ workspaceId }: Props) {
...DEFAULT_CONFIG,
runtime: wsMetadataRuntime,
model: wsMetadataModel,
...(wsMetadataTier !== null ? { tier: wsMetadataTier } : {}),
} as ConfigData);
} finally {
setLoading(false);

View File

@ -36,7 +36,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
const [restartError, setRestartError] = useState<string | null>(null);
const [consoleOpen, setConsoleOpen] = useState(false);
const updateNodeData = useCanvasStore((s) => s.updateNodeData);
const removeNode = useCanvasStore((s) => s.removeNode);
const removeSubtree = useCanvasStore((s) => s.removeSubtree);
const selectNode = useCanvasStore((s) => s.selectNode);
// Ref for the "Delete Workspace" trigger — Cancel returns focus here
const deleteButtonRef = useRef<HTMLButtonElement>(null);
@ -94,7 +94,11 @@ export function DetailsTab({ workspaceId, data }: Props) {
setDeleteError(null);
try {
await api.del(`/workspaces/${workspaceId}?confirm=true`);
removeNode(workspaceId);
// Mirror the server-side cascade — drop the row + every
// descendant locally so the canvas reflects the deletion
// immediately, even when the WS is dead and the per-descendant
// WORKSPACE_REMOVED events never arrive.
removeSubtree(workspaceId);
selectNode(null);
} catch (e) {
setDeleteError(e instanceof Error ? e.message : "Failed to delete");

View File

@ -6,6 +6,14 @@ import { useCanvasStore, summarizeWorkspaceCapabilities, type WorkspaceNodeData
import { showToast } from "../Toaster";
interface Props {
// The workspace's id is NOT a field on WorkspaceNodeData — that
// interface is the React Flow `node.data` blob, while the id lives
// on `node.id`. Pass it explicitly (matches every other tab in
// SidePanel) so the install/uninstall API calls don't end up
// POSTing to /workspaces/undefined/plugins. The interface extending
// Record<string, unknown> meant TypeScript silently typed
// `data.id` as `unknown` instead of erroring — easy to miss.
workspaceId: string;
data: WorkspaceNodeData;
}
@ -40,7 +48,7 @@ interface SourceSchemesResponse {
// Delay before reloading installed plugins after install/uninstall (workspace restarts)
const PLUGIN_RELOAD_DELAY_MS = 15_000;
export function SkillsTab({ data }: Props) {
export function SkillsTab({ workspaceId, data }: Props) {
const capability = summarizeWorkspaceCapabilities(data);
const skills = useMemo(() => extractSkills(data.agentCard), [data.agentCard]);
const setPanelTab = useCanvasStore((s) => s.setPanelTab);
@ -57,32 +65,115 @@ export function SkillsTab({ data }: Props) {
const reloadTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
useEffect(() => {
// Re-init `mountedRef.current = true` on every mount. React 18
// StrictMode (Next.js dev) double-invokes effects: mount →
// cleanup → mount. Without this re-init, the first cleanup sets
// mountedRef.current = false, the re-mount runs the effect body
// again but never restores the flag, so every subsequent
// `if (mountedRef.current) setX(...)` guard skips and the
// component appears wedged: fetches complete, state never
// updates, "Loading…" sits forever. Production doesn't double-
// invoke so the bug only surfaces in dev — but dev is where we
// see it, and the cost of being explicit is one assignment.
mountedRef.current = true;
return () => {
mountedRef.current = false;
clearTimeout(reloadTimerRef.current);
};
}, []);
const workspaceId = data.id;
// Tracks whether loadInstalled has completed at least once (success
// or empty-array success — NOT failure). Without this the auto-
// expand effect below would fire on the initial render where
// `installed.length === 0` simply because the fetch hasn't returned
// yet, and worse, would also fire if the fetch throws (network
// blip, auth failure) — both cases falsely look like "no plugins
// installed". Gating on a separate "loaded" flag avoids the false
// positive.
const [installedLoaded, setInstalledLoaded] = useState(false);
const loadInstalled = useCallback(async () => {
try {
const result = await api.get<PluginInfo[]>(`/workspaces/${workspaceId}/plugins`);
if (mountedRef.current) setInstalled(Array.isArray(result) ? result : []);
if (mountedRef.current) {
setInstalled(Array.isArray(result) ? result : []);
setInstalledLoaded(true);
}
} catch (e) {
console.warn("SkillsTab: installed plugins load failed", e);
}
}, [workspaceId]);
const loadRegistry = useCallback(async () => {
// registry-load lifecycle so the UI can show "Loading…" / error /
// retry instead of an indistinguishable "No plugins in registry"
// banner whether the fetch is in-flight, errored, or genuinely
// returned []. The previous silent console.warn-only path made
// an auth failure or CORS blip look identical to an empty
// registry — exactly the diagnosis dead-end observed when the
// server returned 20 plugins via curl but the canvas showed 0.
const [registryLoading, setRegistryLoading] = useState(false);
const [registryError, setRegistryError] = useState<string | null>(null);
// Synchronous gate against concurrent loadRegistry runs. Refs survive
// Fast Refresh re-renders (ref objects persist across re-runs of
// the function body), so a previously-stranded fetch can pin this
// ref at true and block every subsequent loadRegistry call. The
// `force` parameter on loadRegistry below provides the user-driven
// escape hatch for that wedge.
const registryFetchInFlight = useRef(false);
// Reset the in-flight gate on unmount so a Fast Refresh that
// tears down + recreates the component without a full page reload
// doesn't carry the stuck-true value into the new instance via
// dev-server-preserved module state.
useEffect(() => {
return () => {
registryFetchInFlight.current = false;
};
}, []);
const loadRegistry = useCallback(async (force = false) => {
// Default callers (mount effect, button while not loading) honour
// the gate. Explicit force=true callers (Retry button) bypass it
// — the user is signalling "forget whatever you thought was in
// flight, fetch again now".
if (!force && registryFetchInFlight.current) return;
registryFetchInFlight.current = true;
setRegistryLoading(true);
setRegistryError(null);
try {
const result = await api.get<PluginInfo[]>("/plugins");
// 10s timeout — tighter than the 15s default. Plugin registry
// is local-disk-backed on the platform host (server reads
// pluginsDir entries) so a 10s budget is generous. Without
// an explicit timeout the UI's "Loading registry…" can sit
// for the full 15s + any browser hop time when a Fast
// Refresh strands an in-flight promise.
const result = await api.get<PluginInfo[]>("/plugins", { timeoutMs: 10_000 });
if (mountedRef.current) setRegistry(Array.isArray(result) ? result : []);
} catch (e) {
// Registry is the AVAILABLE PLUGINS list. Silent failure here
// left the user seeing "No plugins in registry" with no clue
// it was a fetch error — log it so devtools shows the cause.
console.warn("SkillsTab: registry load failed", e);
if (mountedRef.current) {
// Detect timeout/abort by DOMException.name first — that's
// the canonical signal across browsers. Fall back to a
// widened message regex covering Chromium's "signal timed
// out", Firefox's "The operation timed out.", Safari's
// "Aborted". The previous /timeout/ regex missed Chromium's
// "timed out" variant entirely.
const name = (e as { name?: string })?.name ?? "";
const msg = e instanceof Error ? e.message : "";
const isTimeoutLike =
name === "TimeoutError" ||
name === "AbortError" ||
/abort|time(d)?\s*out/i.test(msg);
setRegistryError(
isTimeoutLike
? "Registry fetch timed out (10s). The platform server may be slow or unreachable."
: msg || "Failed to load registry",
);
}
} finally {
registryFetchInFlight.current = false;
if (mountedRef.current) setRegistryLoading(false);
}
}, []);
@ -102,17 +193,73 @@ export function SkillsTab({ data }: Props) {
loadSourceSchemes();
}, [loadInstalled, loadRegistry, loadSourceSchemes]);
// First-time experience: if the workspace has zero plugins
// installed but the platform's registry has options to choose
// from, expand the registry by default so the user sees what's
// available without an extra click. Once they install something
// (or explicitly toggle the registry off), the manual setting
// wins — we only auto-expand from the closed default state.
const hasAutoExpandedRef = useRef(false);
useEffect(() => {
if (hasAutoExpandedRef.current) return;
if (installedLoaded && installed.length === 0 && registry.length > 0) {
setShowRegistry(true);
hasAutoExpandedRef.current = true;
}
}, [installedLoaded, installed.length, registry.length]);
const installedNames = useMemo(() => new Set(installed.map((p) => p.name)), [installed]);
// Install always goes through the source-based API. For registry
// plugins we build the local:// source on the fly; custom sources
// (github://, clawhub://, …) are typed into the input below.
const installFromSource = async (source: string, labelOverride?: string) => {
//
// Optional `optimistic` parameter mirrors the uninstall flow's local
// state mutation. Without it, the user sees the button revert from
// "Installing..." → "Install" the instant the POST returns, and the
// green "Installed" tag doesn't appear for ~15s while we wait out
// PLUGIN_RELOAD_DELAY_MS for the workspace restart before refetching.
// 15s of staring at the same button feels broken. Pushing the
// registry entry into `installed` immediately makes the UI reflect
// the install instantly; the delayed loadInstalled() reconciles
// anything we got wrong (or any server-side filtering we don't
// know about locally).
const installFromSource = async (
source: string,
labelOverride?: string,
optimistic?: PluginInfo,
) => {
const label = labelOverride ?? source;
setInstalling(label);
try {
await api.post(`/workspaces/${workspaceId}/plugins`, { source });
showToast(`Installed ${label} — restarting workspace`, "success");
if (optimistic && mountedRef.current) {
// Push with `supported_on_runtime` left undefined — the
// server's ListInstalled annotates the real value (true /
// false) at refetch time. Forcing `true` here would hide the
// "inert on this runtime" badge for 15s if the user
// installed a plugin that doesn't actually support the
// workspace's runtime; the badge only renders on `=== false`,
// so undefined keeps it neutral until reconciliation arrives.
setInstalled((prev) =>
prev.some((p) => p.name === optimistic.name)
? prev
: [...prev, { ...optimistic, supported_on_runtime: undefined }],
);
// Note: we intentionally do NOT set `installedLoaded` here.
// That flag means "the initial GET has succeeded at least
// once" and gates the auto-expand-registry effect. A fast
// optimistic install BEFORE the initial fetch returns must
// not flip the gate, or the auto-expand never fires and a
// followup loadInstalled racing with the optimistic write
// could overwrite our entry with [] mid-restart.
}
// Drop any prior reload timer before scheduling a new one —
// back-to-back installs within PLUGIN_RELOAD_DELAY_MS would
// otherwise queue multiple loadInstalled() calls and the
// unmount cleanup only clears the latest handle.
clearTimeout(reloadTimerRef.current);
reloadTimerRef.current = setTimeout(() => loadInstalled(), PLUGIN_RELOAD_DELAY_MS);
} catch (e) {
showToast(e instanceof Error ? e.message : "Install failed", "error");
@ -121,7 +268,10 @@ export function SkillsTab({ data }: Props) {
}
};
const handleInstall = (pluginName: string) => installFromSource(`local://${pluginName}`, pluginName);
const handleInstall = (pluginName: string) => {
const entry = registry.find((p) => p.name === pluginName);
return installFromSource(`local://${pluginName}`, pluginName, entry);
};
const handleInstallCustom = async () => {
const source = customSource.trim();
@ -133,9 +283,12 @@ export function SkillsTab({ data }: Props) {
const handleUninstall = async (pluginName: string) => {
setUninstalling(pluginName);
try {
await api.del(`/workspaces/${data.id}/plugins/${pluginName}`);
await api.del(`/workspaces/${workspaceId}/plugins/${pluginName}`);
showToast(`Removed ${pluginName} — restarting workspace`, "success");
setInstalled((prev) => prev.filter((p) => p.name !== pluginName));
// Drop any prior reload timer (see installFromSource for the
// back-to-back-action leak rationale).
clearTimeout(reloadTimerRef.current);
reloadTimerRef.current = setTimeout(() => loadInstalled(), PLUGIN_RELOAD_DELAY_MS);
} catch (e) {
showToast(e instanceof Error ? e.message : "Uninstall failed", "error");
@ -264,9 +417,53 @@ export function SkillsTab({ data }: Props) {
Local registry plugins below; paste any scheme URL above for GitHub or other sources.
</div>
</div>
<div className="text-[10px] uppercase tracking-[0.2em] text-zinc-600 mb-2">Available plugins</div>
{registry.length === 0 ? (
<div className="text-[10px] text-zinc-600">No plugins in registry</div>
<div className="flex items-center justify-between mb-2">
<div className="text-[10px] uppercase tracking-[0.2em] text-zinc-600">Available plugins</div>
{/* Retry visible whenever registry is empty including
the loading state so a stuck fetch (Fast Refresh
stranded promise, slow server, browser quirk) has a
user-driven escape hatch. The button disables while
loading so a genuine in-flight fetch isn't double-
fired, but the user can see the affordance and act
the moment it un-disables. */}
{registry.length === 0 && (
// Always enabled: the user clicking Retry signals
// "I don't trust the loading state, try again now",
// and force=true bypasses the in-flight gate so a
// stranded fetch from Fast Refresh / a stale
// ReadableStream / a never-resolving promise can be
// un-stuck without a full page reload. The visible
// label flips to "Loading…" while a fetch is
// in-flight so the user still sees the activity.
<button
type="button"
onClick={() => loadRegistry(true)}
className="text-[10px] text-violet-300 hover:text-violet-200 underline-offset-2 hover:underline"
>
{registryLoading ? "Loading… click to retry" : "Retry"}
</button>
)}
</div>
{registryLoading && registry.length === 0 ? (
<div className="text-[10px] text-zinc-500">Loading registry</div>
) : registryError ? (
<div className="rounded-lg border border-red-800/40 bg-red-950/20 px-2 py-1.5">
<div className="text-[10px] text-red-300 font-semibold mb-0.5">
Couldn't load the plugin registry
</div>
<div className="text-[10px] text-red-400/80">{registryError}</div>
<div className="mt-1 text-[10px] text-zinc-500">
Check the platform server is reachable at /plugins. The Retry button is in the header above.
</div>
</div>
) : registry.length === 0 ? (
<div className="rounded-lg border border-zinc-800/40 bg-zinc-950/40 px-2 py-1.5">
<div className="text-[10px] text-zinc-400 mb-0.5">Registry returned 0 plugins.</div>
<div className="text-[10px] text-zinc-600">
This usually means the platform's plugins/ directory is empty.
Run scripts/clone-manifest.sh to populate it from the standalone repos.
</div>
</div>
) : (
<div className="space-y-1.5">
{registry.map((p) => {

View File

@ -128,7 +128,13 @@ describe("ConfigTab — hermes workspace", () => {
});
});
it("shows hermes-specific info banner pointing to Terminal tab (#1894)", async () => {
it("does NOT show the hermes-specific info banner (removed in #2061)", async () => {
// Banner-text inversion: the multilevel-layout-UX PR drops "hermes"
// from RUNTIMES_WITH_OWN_CONFIG (now {"external"} only). Hermes now
// shows the normal Config form — the banner "Hermes manages its own
// config" is reserved for the "external" runtime, not hermes itself.
// If this ever flips back, revisit the banner/error UX before
// unpinning this assertion.
wireApi({
workspaceRuntime: "hermes",
configYamlContent: null,
@ -137,9 +143,11 @@ describe("ConfigTab — hermes workspace", () => {
render(<ConfigTab workspaceId="ws-test" />);
await waitFor(() => {
expect(screen.getByText(/Hermes manages its own config/i)).toBeTruthy();
});
// Wait for the render+loads to settle (template list drives the runtime combobox).
await waitFor(() =>
screen.getByRole("combobox", { name: /runtime/i }),
);
expect(screen.queryByText(/Hermes manages its own config/i)).toBeNull();
});
it("DOES show 'No config.yaml found' error for langgraph workspace (default runtime)", async () => {
@ -161,14 +169,28 @@ describe("ConfigTab — hermes workspace", () => {
});
describe("ConfigTab — config.yaml on disk", () => {
it("config.yaml runtime/model wins when present, workspace metadata is fallback", async () => {
// If the workspace DB has runtime=langgraph but config.yaml declares
// runtime: crewai, the form should show crewai (config.yaml wins).
// Prevents silent runtime drift across reads.
it("workspace metadata (DB) wins over config.yaml when both are present (#2061)", async () => {
// Priority inversion in #2061: previously config.yaml overrode DB, so
// the tier-on-node badge and runtime-in-form could drift when the
// user edited config.yaml on disk. The multilevel-layout-UX PR made
// the DB authoritative — config.yaml is read for non-DB keys (tools,
// MCP server list, etc.) but runtime/model/tier come from the
// workspace row so the node badge matches the form.
//
// Scenario: DB says "hermes", config.yaml says "crewai". The form
// must show hermes (DB wins).
//
// We pick hermes (not langgraph) on the DB side because "langgraph"
// is collapsed to the empty-string "LangGraph (default)" option in
// the runtime dropdown — so a "langgraph" DB value would render as
// the empty-valued option and obscure whether the DB-wins logic
// actually fired. Hermes has its own non-empty option value and
// gives the assertion a clean signal.
wireApi({
workspaceRuntime: "langgraph", // DB
workspaceRuntime: "hermes", // DB — authoritative
configYamlContent: 'runtime: crewai\nmodel: "claude-opus"\n',
templates: [
{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] },
{ id: "t-crewai", name: "CrewAI", runtime: "crewai", models: [] },
],
});
@ -176,6 +198,6 @@ describe("ConfigTab — config.yaml on disk", () => {
render(<ConfigTab workspaceId="ws-test" />);
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
expect((select as HTMLSelectElement).value).toBe("crewai");
expect((select as HTMLSelectElement).value).toBe("hermes");
});
});

View File

@ -1,13 +1,17 @@
"use client";
import { useState, useEffect, useRef } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { api } from "@/lib/api";
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
import { WS_URL } from "@/store/socket";
import { closeWebSocketGracefully } from "@/lib/ws-close";
import { showToast } from "../../Toaster";
import { extractResponseText, extractRequestText } from "./message-parser";
import { inferA2AErrorHint } from "./a2aErrorHint";
interface ActivityEntry {
export interface ActivityEntry {
id: string;
activity_type: string;
source_id: string | null;
@ -22,11 +26,29 @@ interface ActivityEntry {
interface CommMessage {
id: string;
direction: "in" | "out";
/** UI-facing flow from THIS workspace's point of view:
*
* "out" this workspace either initiated the call (a2a_send)
* OR self-logged the reply from a peer it had called
* (a2a_receive with source_id == workspaceId).
* "in" a peer initiated the call to us (a2a_receive with
* source_id != workspaceId).
*
* Distinct from activity_type because the agent runtime self-
* logs its outbound calls' replies as `a2a_receive` rows; without
* this normalisation the UI labels would render those as
* incoming ("← From X") and right-justify them on the wrong
* side, even though from the user's perspective the call WAS
* outgoing. See toCommMessage for the resolution rules. */
flow: "in" | "out";
peerName: string;
peerId: string;
text: string;
responseText: string | null;
/** "ok" | "error" surfaces failed deliveries with their own
* visual treatment + recovery actions instead of an opaque
* "[A2A_ERROR]" body the user can't act on. */
status: string;
timestamp: string;
}
@ -36,9 +58,31 @@ function resolveName(id: string): string {
return (node?.data as WorkspaceNodeData)?.name || id.slice(0, 8);
}
function toCommMessage(entry: ActivityEntry, workspaceId: string): CommMessage | null {
const isOutgoing = entry.activity_type === "a2a_send";
const peerId = isOutgoing ? (entry.target_id || "") : (entry.source_id || "");
export function toCommMessage(entry: ActivityEntry, workspaceId: string): CommMessage | null {
// a2a_receive activity rows come in two shapes:
//
// 1. Real incoming call (a peer called us): source_id = the peer,
// target_id = us. peerId is source_id, flow is "in".
//
// 2. Self-logged response to an outbound call (the workspace's own
// runtime calls report_activity("a2a_receive", ...) after
// delegating; see workspace/a2a_tools.py:181). source_id =
// our own workspace_id, target_id = the peer that replied.
// peerId must come from target_id (otherwise the peer-name
// resolves to "us" and Restart would target THIS workspace),
// and flow is "out" — from the user's perspective this row
// belongs to the outbound thread, not an incoming one.
//
// a2a_send rows are always outbound from us: source_id = us,
// target_id = the peer.
const isSendActivity = entry.activity_type === "a2a_send";
const isSelfLoggedReceive =
entry.activity_type === "a2a_receive" && entry.source_id === workspaceId;
const flow: "in" | "out" = isSendActivity || isSelfLoggedReceive ? "out" : "in";
const peerId =
isSendActivity || isSelfLoggedReceive
? entry.target_id || ""
: entry.source_id || "";
if (!peerId) return null;
const text = extractRequestText(entry.request_body) || entry.summary || "";
@ -46,15 +90,35 @@ function toCommMessage(entry: ActivityEntry, workspaceId: string): CommMessage |
return {
id: entry.id,
direction: isOutgoing ? "out" : "in",
flow,
peerName: resolveName(peerId),
peerId,
text,
responseText,
status: entry.status || "ok",
timestamp: entry.created_at,
};
}
/** Strip the [A2A_ERROR] sentinel prefix the workspace runtime adds
* to failed delegation responses, so the UI can render the underlying
* message (or fall back to a generic explanation when the inner text
* is empty currently common because httpx exceptions often
* stringify as ""). */
const A2A_ERROR_PREFIX = "[A2A_ERROR]";
function unwrapErrorText(raw: string | null): string {
if (!raw) return "";
const trimmed = raw.trim();
if (trimmed.startsWith(A2A_ERROR_PREFIX)) {
return trimmed.slice(A2A_ERROR_PREFIX.length).trim();
}
return trimmed;
}
// inferA2AErrorHint moved to ./a2aErrorHint so the Activity tab and
// this panel render identical hints for the same symptom.
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
const [messages, setMessages] = useState<CommMessage[]>([]);
const [loading, setLoading] = useState(true);
@ -67,22 +131,45 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
setLoading(true);
api.get<ActivityEntry[]>(`/workspaces/${workspaceId}/activity?source=agent&limit=50`)
.then((entries) => {
const filtered = entries
const filtered = (entries ?? [])
.filter((e) => e.activity_type === "a2a_send" || e.activity_type === "a2a_receive")
.reverse();
const msgs: CommMessage[] = [];
for (const e of filtered) {
const m = toCommMessage(e, workspaceId);
if (m) {
const key = `${m.timestamp}:${m.direction}:${m.peerId}`;
msgs.push(m);
seenKeys.current.add(key);
// Per-row try/catch so a single malformed activity row
// (e.g. unexpected request_body shape) doesn't kill the
// batch — the previous code threw out of the for-loop and
// setMessages([3 items]) never ran, leaving the panel
// stuck on the empty state with no diagnostic in the
// console because the outer .catch silently swallowed
// everything.
try {
const m = toCommMessage(e, workspaceId);
if (m) {
const key = `${m.timestamp}:${m.flow}:${m.peerId}`;
msgs.push(m);
seenKeys.current.add(key);
}
} catch (rowErr) {
console.warn(
"AgentCommsPanel: failed to map activity row",
{ id: e.id, type: e.activity_type, err: rowErr },
);
}
}
setMessages(msgs);
setLoading(false);
})
.catch(() => setLoading(false));
.catch((err) => {
// Surface the failure in the console so a stuck panel is
// diagnosable without a debugger. Previous bare
// `.catch(() => setLoading(false))` swallowed every load
// failure (network errors, JSON parse errors, throws inside
// the .then body) — the panel just sat on the empty state
// with zero signal.
console.warn("AgentCommsPanel: load activity failed", err);
setLoading(false);
});
}, [workspaceId]);
// Live updates via WebSocket
@ -115,7 +202,7 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
};
const m = toCommMessage(entry, workspaceId);
if (m) {
const key = `${m.timestamp}:${m.direction}:${m.peerId}`;
const key = `${m.timestamp}:${m.flow}:${m.peerId}`;
if (seenKeys.current.has(key)) return;
seenKeys.current.add(key);
setMessages((prev) => [...prev, m]);
@ -148,31 +235,177 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
return (
<div className="flex-1 overflow-y-auto p-3 space-y-2">
{messages.map((msg) => (
<div key={msg.id} className={`flex ${msg.direction === "out" ? "justify-end" : "justify-start"}`}>
<div
className={`max-w-[85%] rounded-lg px-3 py-2 text-xs ${
msg.direction === "out"
? "bg-cyan-900/30 text-cyan-100 border border-cyan-700/20"
: "bg-zinc-800/80 text-zinc-200 border border-zinc-700/30"
}`}
>
<div className="text-[9px] text-zinc-500 mb-1">
{msg.direction === "out" ? `→ To ${msg.peerName}` : `← From ${msg.peerName}`}
</div>
<div className="text-zinc-300">{msg.text || "(no message text)"}</div>
{msg.responseText && (
<div className="mt-1.5 pt-1.5 border-t border-zinc-700/30 text-zinc-400">
{msg.responseText}
</div>
)}
<div className="text-[9px] text-zinc-500 mt-1">
{new Date(msg.timestamp).toLocaleTimeString()}
</div>
</div>
</div>
))}
{messages.map((msg) =>
msg.status === "error" ? (
<ErrorMessage key={msg.id} msg={msg} />
) : (
<NormalMessage key={msg.id} msg={msg} />
),
)}
<div ref={bottomRef} />
</div>
);
}
function NormalMessage({ msg }: { msg: CommMessage }) {
return (
<div className={`flex ${msg.flow === "out" ? "justify-end" : "justify-start"}`}>
<div
className={`max-w-[85%] rounded-lg px-3 py-2 text-xs ${
msg.flow === "out"
? "bg-cyan-900/30 text-cyan-100 border border-cyan-700/20"
: "bg-zinc-800/80 text-zinc-200 border border-zinc-700/30"
}`}
>
<div className="text-[9px] text-zinc-500 mb-1">
{msg.flow === "out" ? `→ To ${msg.peerName}` : `← From ${msg.peerName}`}
</div>
{msg.text ? (
<MarkdownBody className="text-zinc-300">{msg.text}</MarkdownBody>
) : (
<div className="text-zinc-300">(no message text)</div>
)}
{msg.responseText && (
<MarkdownBody className="mt-1.5 pt-1.5 border-t border-zinc-700/30 text-zinc-400">
{msg.responseText}
</MarkdownBody>
)}
<div className="text-[9px] text-zinc-500 mt-1">
{new Date(msg.timestamp).toLocaleTimeString()}
</div>
</div>
</div>
);
}
/** Failure-state row. Replaces the unactionable "X failed [A2A_ERROR]"
* bubble with: a clear banner naming the peer, the underlying
* error text (if any), an inferred cause hint, and recovery
* actions Restart workspace, Open workspace.
*
* Recovery actions show on BOTH directions because both target the
* same peer (toCommMessage now resolves peerId to the peer in
* either case): an outbound delivery failure ("we called X and it
* errored"), an inbound runtime failure ("X called us and our
* reply errored" — rare), or the agent-self-logged "I called X and
* got an error back" pattern that is the most common shape. The
* user always wants to restart or inspect the failing peer. */
function ErrorMessage({ msg }: { msg: CommMessage }) {
const selectNode = useCanvasStore((s) => s.selectNode);
const [restarting, setRestarting] = useState(false);
const errorText = unwrapErrorText(msg.responseText);
const hint = inferA2AErrorHint(errorText);
// Guard against acting on a peer whose workspace has been deleted
// since this row was logged. Without the guard, restart 404s
// surface as a generic toast and Open silently sets a dangling
// selection that renders nothing in the side panel.
const peerExists = (): boolean => {
return useCanvasStore.getState().nodes.some((n) => n.id === msg.peerId);
};
const handleRestart = async () => {
if (restarting) return;
if (!peerExists()) {
showToast(`${msg.peerName} no longer exists`, "error");
return;
}
setRestarting(true);
try {
await api.post(`/workspaces/${msg.peerId}/restart`, {});
showToast(`Restarting ${msg.peerName}`, "success");
} catch (e) {
showToast(
`Restart failed: ${e instanceof Error ? e.message : "unknown error"}`,
"error",
);
} finally {
setRestarting(false);
}
};
const handleOpen = () => {
if (!peerExists()) {
showToast(`${msg.peerName} no longer exists`, "error");
return;
}
selectNode(msg.peerId);
};
return (
<div className={`flex ${msg.flow === "out" ? "justify-end" : "justify-start"}`}>
<div className="max-w-[85%] rounded-lg border border-red-800/50 bg-red-950/30 px-3 py-2 text-xs">
<div className="flex items-center gap-1.5 text-[10px] text-red-300 font-semibold uppercase tracking-wide mb-1.5">
<span aria-hidden="true"></span>
{msg.flow === "out"
? `Failed to deliver to ${msg.peerName}`
: `${msg.peerName} returned an error`}
</div>
{msg.text && (
<div className="text-[10px] text-zinc-500 mb-1.5">
<span className="uppercase tracking-wide">Task</span>
<MarkdownBody className="text-zinc-400">{msg.text}</MarkdownBody>
</div>
)}
<div className="rounded bg-zinc-950/60 border border-red-900/40 px-2 py-1.5 mb-1.5">
<div className="text-[9px] uppercase tracking-wide text-red-400 mb-0.5">
Underlying error
</div>
<code className="text-[11px] font-mono text-red-200 whitespace-pre-wrap break-words">
{errorText || "(no detail returned)"}
</code>
</div>
<p className="text-[10px] text-zinc-400 leading-snug mb-2">{hint}</p>
{msg.peerId && (
<div className="flex flex-wrap items-center gap-1.5">
<button
type="button"
onClick={handleRestart}
disabled={restarting}
className="px-2 py-0.5 rounded bg-red-900/50 hover:bg-red-800/60 border border-red-700/40 text-[10px] text-red-200 disabled:opacity-50 transition-colors"
>
{restarting ? "Restarting…" : `Restart ${msg.peerName}`}
</button>
<button
type="button"
onClick={handleOpen}
className="px-2 py-0.5 rounded bg-zinc-800 hover:bg-zinc-700 border border-zinc-700/50 text-[10px] text-zinc-300 transition-colors"
>
Open {msg.peerName}
</button>
</div>
)}
<div className="text-[9px] text-zinc-500 mt-1.5">
{new Date(msg.timestamp).toLocaleTimeString()}
</div>
</div>
</div>
);
}
/** Tiny markdown wrapper matching ChatTab's My Chat styling. Same
* remark-gfm pipeline (tables, strikethrough, task lists) plus the
* prose tweaks that keep paragraphs tight inside a small bubble.
* Code blocks get an `overflow-x-auto` so a long line of code doesn't
* blow out the bubble's max-width agent-to-agent replies routinely
* ship code samples and JSON. */
function MarkdownBody({
children,
className,
}: {
children: string;
className?: string;
}) {
return (
<div
className={`prose prose-sm prose-invert max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0 [&_pre]:overflow-x-auto [&_table]:block [&_table]:overflow-x-auto ${className ?? ""}`}
>
<ReactMarkdown remarkPlugins={[remarkGfm]}>{children}</ReactMarkdown>
</div>
);
}

View File

@ -0,0 +1,94 @@
"use client";
// Small presentational components for chat attachments. Kept in a
// separate file so ChatTab.tsx stays focused on state + send/receive
// orchestration. Both variants share the file-icon + name + size
// layout; the only difference is the trailing action (remove for
// pending, download for completed).
import type { ChatAttachment } from "./types";
function formatSize(bytes: number | undefined): string {
if (bytes == null) return "";
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
/** Inline pill for a file that the user has picked but not yet sent.
* Renders above the textarea; clicking × pops it from the pending
* list without uploading. */
export function PendingAttachmentPill({
file,
onRemove,
}: {
file: File;
onRemove: () => void;
}) {
return (
<div className="flex items-center gap-1.5 rounded-md border border-zinc-700/60 bg-zinc-800/80 px-2 py-1 text-[10px] text-zinc-300 max-w-[200px]">
<FileGlyph className="text-zinc-400 shrink-0" />
<span className="truncate" title={file.name}>{file.name}</span>
<span className="text-zinc-500 shrink-0 tabular-nums">{formatSize(file.size)}</span>
<button
onClick={onRemove}
aria-label={`Remove ${file.name}`}
className="ml-0.5 text-zinc-500 hover:text-zinc-200 transition-colors shrink-0"
>
<svg width="10" height="10" viewBox="0 0 16 16" fill="none" aria-hidden="true">
<path d="M4 4l8 8M12 4l-8 8" stroke="currentColor" strokeWidth="1.6" strokeLinecap="round" />
</svg>
</button>
</div>
);
}
/** Chip rendered inside a message bubble for a sent/received file.
* Clicking triggers the download via the passed onDownload callback
* so the parent controls workspace-scoped URL resolution. */
export function AttachmentChip({
attachment,
onDownload,
tone,
}: {
attachment: ChatAttachment;
onDownload: (a: ChatAttachment) => void;
tone: "user" | "agent";
}) {
const toneClasses =
tone === "user"
? "border-blue-400/30 bg-blue-600/20 hover:bg-blue-600/30 text-blue-100"
: "border-zinc-600/50 bg-zinc-700/40 hover:bg-zinc-600/50 text-zinc-100";
return (
<button
onClick={() => onDownload(attachment)}
title={`Download ${attachment.name}`}
className={`flex items-center gap-1.5 rounded-md border px-2 py-1 text-[10px] transition-colors max-w-full ${toneClasses}`}
>
<FileGlyph className="shrink-0 opacity-70" />
<span className="truncate">{attachment.name}</span>
{attachment.size != null && (
<span className="opacity-60 shrink-0 tabular-nums">{formatSize(attachment.size)}</span>
)}
<DownloadGlyph className="opacity-70 shrink-0" />
</button>
);
}
function FileGlyph({ className }: { className?: string }) {
return (
<svg width="10" height="10" viewBox="0 0 16 16" fill="none" className={className} aria-hidden="true">
<path d="M4 2h5l3 3v9a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V3a1 1 0 0 1 1-1Z" stroke="currentColor" strokeWidth="1.3" strokeLinejoin="round" />
<path d="M9 2v3h3" stroke="currentColor" strokeWidth="1.3" strokeLinejoin="round" />
</svg>
);
}
function DownloadGlyph({ className }: { className?: string }) {
return (
<svg width="10" height="10" viewBox="0 0 16 16" fill="none" className={className} aria-hidden="true">
<path d="M8 2v9M4 7l4 4 4-4" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" strokeLinejoin="round" />
<path d="M3 13h10" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" />
</svg>
);
}

View File

@ -0,0 +1,113 @@
// @vitest-environment jsdom
import { describe, it, expect, vi } from "vitest";
// Stub the canvas store before importing the SUT — toCommMessage calls
// useCanvasStore.getState() inside resolveName to look up peer names,
// which would otherwise hit the real Zustand store.
vi.mock("@/store/canvas", () => ({
useCanvasStore: {
getState: () => ({
nodes: [
{ id: "ws-self", data: { name: "Self" } },
{ id: "ws-peer", data: { name: "Peer Agent" } },
],
}),
},
}));
import { toCommMessage, type ActivityEntry } from "../AgentCommsPanel";
const SELF = "ws-self";
const PEER = "ws-peer";
function makeEntry(overrides: Partial<ActivityEntry> = {}): ActivityEntry {
return {
id: "act-1",
activity_type: "a2a_send",
source_id: SELF,
target_id: PEER,
method: "message/send",
summary: "Delegating to Peer Agent",
request_body: null,
response_body: null,
status: "ok",
created_at: "2026-04-25T18:00:00Z",
...overrides,
};
}
describe("toCommMessage — flow derivation", () => {
it("a2a_send is always outbound (flow=out, peer=target)", () => {
const m = toCommMessage(
makeEntry({ activity_type: "a2a_send", source_id: SELF, target_id: PEER }),
SELF,
);
expect(m).toBeTruthy();
expect(m!.flow).toBe("out");
expect(m!.peerId).toBe(PEER);
expect(m!.peerName).toBe("Peer Agent");
});
it("a2a_receive from a peer (peer-initiated call) is inbound", () => {
// Real incoming call: source = peer, target = us.
const m = toCommMessage(
makeEntry({
activity_type: "a2a_receive",
source_id: PEER,
target_id: SELF,
}),
SELF,
);
expect(m!.flow).toBe("in");
expect(m!.peerId).toBe(PEER);
expect(m!.peerName).toBe("Peer Agent");
});
it("a2a_receive self-logged by our runtime AFTER an outbound call is OUTBOUND from the user's POV", () => {
// workspace/a2a_tools.py:181 self-logs an a2a_receive on the
// CALLER's workspace_id with source_id=us, target_id=peer.
// From the user's perspective this row belongs to the outbound
// delegation thread — render flow=out + peer=target so the
// bubble right-justifies under "Delegating to peer" and the
// Restart button targets the actual peer (NOT us). Regression
// for the bug where these rows rendered as "← From Self" with
// a Restart button that would have restarted the user's own
// workspace.
const m = toCommMessage(
makeEntry({
activity_type: "a2a_receive",
source_id: SELF,
target_id: PEER,
summary: "Peer Agent failed",
status: "error",
}),
SELF,
);
expect(m!.flow).toBe("out");
expect(m!.peerId).toBe(PEER);
expect(m!.peerName).toBe("Peer Agent");
expect(m!.status).toBe("error");
});
it("returns null when no peer can be resolved", () => {
// a2a_receive with both ids null — discard rather than render a
// ghost bubble pointing at "Unknown".
const m = toCommMessage(
makeEntry({
activity_type: "a2a_receive",
source_id: null,
target_id: null,
}),
SELF,
);
expect(m).toBeNull();
});
it("propagates status through to the message (drives error rendering)", () => {
const m = toCommMessage(
makeEntry({ status: "error", activity_type: "a2a_send" }),
SELF,
);
expect(m!.status).toBe("error");
});
});

View File

@ -0,0 +1,67 @@
import { describe, it, expect } from "vitest";
import { inferA2AErrorHint } from "../a2aErrorHint";
// Pure logic. Pin every named pattern so a future contributor adding a
// new symptom doesn't accidentally collapse the buckets — and so the
// "most specific first" ordering can't drift without a test failing.
describe("inferA2AErrorHint", () => {
it("matches the Claude Code SDK init wedge specifically", () => {
const hint = inferA2AErrorHint("Control request timeout: initialize");
expect(hint).toMatch(/Claude Code SDK is wedged/);
});
it("does NOT misfire on user tasks containing 'initialize' generally", () => {
// Regression: an earlier bare-`initialize` pattern would have
// false-positived "failed to initialize database" into the SDK
// wedge hint. Confirm the full-phrase guard holds.
const hint = inferA2AErrorHint("failed to initialize database connection");
expect(hint).not.toMatch(/Claude Code SDK/);
});
it("recognises httpx ReadTimeout / ConnectTimeout class names", () => {
expect(inferA2AErrorHint("ReadTimeout: timeout")).toMatch(/proxy timeout/);
expect(inferA2AErrorHint("ConnectTimeout: ...")).toMatch(/proxy timeout/);
});
it("recognises generic timeout / deadline-exceeded language", () => {
expect(inferA2AErrorHint("deadline exceeded after 300s")).toMatch(/proxy timeout/);
expect(inferA2AErrorHint("Operation timeout")).toMatch(/proxy timeout/);
});
it("handles connection-reset family (RemoteProtocolError, ConnectionReset, no-message)", () => {
expect(inferA2AErrorHint("RemoteProtocolError: ...")).toMatch(/connection.*dropped/);
expect(inferA2AErrorHint("ConnectionResetError")).toMatch(/connection.*dropped/);
expect(inferA2AErrorHint("connection reset by peer")).toMatch(/connection.*dropped/);
expect(inferA2AErrorHint("RemoteProtocolError (no message — likely connection reset)")).toMatch(/connection.*dropped/);
});
it("recognises agent-runtime exceptions", () => {
expect(inferA2AErrorHint("Agent error: ValueError raised")).toMatch(/runtime threw an exception/);
expect(inferA2AErrorHint("RuntimeException in tool call")).toMatch(/runtime threw an exception/);
});
it("recognises peer-unreachable cases (Activity-tab originals)", () => {
expect(inferA2AErrorHint("workspace not found")).toMatch(/can't be reached/);
expect(inferA2AErrorHint("not accessible")).toMatch(/can't be reached/);
expect(inferA2AErrorHint("workspace is offline")).toMatch(/can't be reached/);
});
it("returns the empty-detail-specific hint when input is exactly empty", () => {
expect(inferA2AErrorHint("")).toMatch(/no error detail/);
});
it("returns a generic fallback for unrecognised text", () => {
const hint = inferA2AErrorHint("some completely novel error nobody has matched yet");
expect(hint).toMatch(/Check the workspace logs|delivery failure/);
});
it("Claude SDK wedge wins over the more general timeout pattern", () => {
// Both 'control request timeout' and 'timeout' match the same
// input. The SDK wedge hint is more actionable; the ordering in
// the function must keep it first. Lock that priority in.
const hint = inferA2AErrorHint("Control request timeout: initialize");
expect(hint).toMatch(/Claude Code SDK/);
expect(hint).not.toMatch(/proxy timeout/);
});
});

View File

@ -0,0 +1,41 @@
import { describe, it, expect } from "vitest";
import { ACTIVITY_LOG_WINDOW, appendActivityLine } from "../activityLog";
describe("appendActivityLine", () => {
it("appends a fresh line", () => {
expect(appendActivityLine([], "📄 Read /a")).toEqual(["📄 Read /a"]);
});
it("collapses an immediate duplicate", () => {
const prev = ["📄 Read /a"];
// Same exact string twice in a row is noise — the helper should
// return the original array reference, not a new one.
expect(appendActivityLine(prev, "📄 Read /a")).toBe(prev);
});
it("keeps non-adjacent duplicates", () => {
const prev = ["📄 Read /a", "⚡ Bash: ls"];
expect(appendActivityLine(prev, "📄 Read /a")).toEqual([
"📄 Read /a",
"⚡ Bash: ls",
"📄 Read /a",
]);
});
it("rolls off the oldest line when the window fills", () => {
const seed = Array.from({ length: ACTIVITY_LOG_WINDOW }, (_, i) => `line-${i}`);
const next = appendActivityLine(seed, "newest");
expect(next.length).toBe(ACTIVITY_LOG_WINDOW);
expect(next[next.length - 1]).toBe("newest");
// Oldest entry is dropped — line-0 is gone.
expect(next[0]).toBe("line-1");
});
it("keeps the original array reference when below the window cap", () => {
const prev = ["a", "b"];
const next = appendActivityLine(prev, "c");
// Returned a new array (we appended); must NOT mutate prev.
expect(prev).toEqual(["a", "b"]);
expect(next).toEqual(["a", "b", "c"]);
});
});

View File

@ -4,6 +4,7 @@ import {
extractResponseText,
extractAgentText,
extractTextsFromParts,
extractFilesFromTask,
} from "../message-parser";
describe("extractRequestText", () => {
@ -99,6 +100,67 @@ describe("extractResponseText", () => {
it("returns empty when result has no parts", () => {
expect(extractResponseText({ result: { other: true } })).toBe("");
});
// Regression: Claude Code (and other long-reply runtimes) emits
// multi-part text replies. The previous implementation returned
// only the first part, silently truncating the rest. Observed
// 2026-04-25 on a 15k-char Wave 1 brief that rendered as just the
// markdown table header.
it("joins all text parts when result.parts has multiple", () => {
const body = {
result: {
parts: [
{ kind: "text", text: "# Header" },
{ kind: "text", text: "| Col |" },
{ kind: "text", text: "| --- |" },
{ kind: "text", text: "| Row |" },
],
},
};
expect(extractResponseText(body)).toBe("# Header\n| Col |\n| --- |\n| Row |");
});
it("joins all text parts across multiple artifacts", () => {
const body = {
result: {
artifacts: [
{ parts: [{ kind: "text", text: "First artifact" }] },
{ parts: [{ kind: "text", text: "Second artifact" }] },
],
},
};
expect(extractResponseText(body)).toBe("First artifact\nSecond artifact");
});
it("joins all .root.text variants when present", () => {
const body = {
result: {
parts: [
{ root: { text: "alpha" } },
{ root: { text: "beta" } },
],
},
};
expect(extractResponseText(body)).toBe("alpha\nbeta");
});
// Regression: when a response carries BOTH parts and artifacts
// (Hermes tool-call replies do this — summary in parts, detail in
// artifacts), the early-return-on-parts implementation silently
// dropped the artifacts body. The collected-from-every-source
// implementation must surface both.
it("collects text from BOTH result.parts AND result.artifacts when both present", () => {
const body = {
result: {
parts: [{ kind: "text", text: "Summary" }],
artifacts: [
{ parts: [{ kind: "text", text: "Detail block one" }] },
{ parts: [{ kind: "text", text: "Detail block two" }] },
],
},
};
expect(extractResponseText(body)).toBe("Summary\nDetail block one\nDetail block two");
});
});
describe("extractTextsFromParts", () => {
@ -133,3 +195,71 @@ describe("extractTextsFromParts", () => {
expect(extractTextsFromParts(parts)).toBe("Only text");
});
});
describe("extractFilesFromTask", () => {
it("pulls A2A file parts out of a result", () => {
const task = {
parts: [
{ kind: "text", text: "here's the report" },
{
kind: "file",
file: { name: "report.pdf", mimeType: "application/pdf", uri: "workspace:/reports/report.pdf", size: 4096 },
},
],
};
const files = extractFilesFromTask(task);
expect(files).toEqual([
{ name: "report.pdf", mimeType: "application/pdf", uri: "workspace:/reports/report.pdf", size: 4096 },
]);
});
it("recovers a filename from the URI when `name` is absent", () => {
const task = {
parts: [
{ kind: "file", file: { uri: "workspace:/workspace/out/graph.png" } },
],
};
const files = extractFilesFromTask(task);
expect(files[0].name).toBe("graph.png");
});
it("skips file parts without a URI (inline bytes are not supported yet)", () => {
const task = {
parts: [
{ kind: "file", file: { name: "inline.bin", bytes: "AAA=" } },
],
};
expect(extractFilesFromTask(task)).toEqual([]);
});
it("walks artifacts[] so file parts nested inside artifact envelopes are found", () => {
const task = {
artifacts: [
{
parts: [
{ kind: "file", file: { name: "trace.log", uri: "workspace:/logs/trace.log" } },
],
},
],
};
const files = extractFilesFromTask(task);
expect(files[0]).toMatchObject({ name: "trace.log", uri: "workspace:/logs/trace.log" });
});
it("returns [] on malformed input rather than throwing", () => {
expect(extractFilesFromTask({})).toEqual([]);
expect(extractFilesFromTask({ parts: "not-an-array" } as unknown as Record<string, unknown>)).toEqual([]);
});
it("walks result.message.parts — the non-task reply shape some A2A servers use", () => {
const task = {
message: {
parts: [
{ kind: "file", file: { name: "out.txt", uri: "workspace:/workspace/out.txt" } },
],
},
};
const files = extractFilesFromTask(task);
expect(files[0]).toMatchObject({ name: "out.txt", uri: "workspace:/workspace/out.txt" });
});
});

View File

@ -0,0 +1,41 @@
import { describe, it, expect } from "vitest";
import { resolveAttachmentHref } from "../uploads";
describe("resolveAttachmentHref — URI scheme normalisation", () => {
const wsId = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee";
it("rewrites the canonical workspace:<path> scheme to /chat/download", () => {
const url = resolveAttachmentHref(wsId, "workspace:/workspace/report.pdf");
expect(url).toContain(`/workspaces/${wsId}/chat/download`);
expect(url).toContain(encodeURIComponent("/workspace/report.pdf"));
});
it("accepts bare absolute container paths (some agents omit the scheme)", () => {
const url = resolveAttachmentHref(wsId, "/workspace/report.pdf");
expect(url).toContain(`/workspaces/${wsId}/chat/download`);
expect(url).toContain(encodeURIComponent("/workspace/report.pdf"));
});
it("accepts file:/// URIs pointing into an allowed root", () => {
const url = resolveAttachmentHref(wsId, "file:///workspace/report.pdf");
expect(url).toContain(`/workspaces/${wsId}/chat/download`);
expect(url).toContain(encodeURIComponent("/workspace/report.pdf"));
});
it("passes through HTTP(S) URIs unchanged so off-platform artefacts still render", () => {
const external = "https://example.com/static/report.pdf";
expect(resolveAttachmentHref(wsId, external)).toBe(external);
});
it("passes through container paths that are not under any allowed root", () => {
// /etc/passwd looks like a path but isn't one of the allowed
// roots — falling back to raw passthrough forces the caller into
// the external-URL branch, which opens a new tab and lets the
// browser refuse. Rewriting would 400 anyway server-side.
expect(resolveAttachmentHref(wsId, "/etc/passwd")).toBe("/etc/passwd");
});
it("passes through unknown schemes unchanged", () => {
expect(resolveAttachmentHref(wsId, "s3://bucket/key")).toBe("s3://bucket/key");
});
});

View File

@ -0,0 +1,54 @@
/**
* Maps an A2A delivery-failure detail string (the bit AFTER stripping
* the [A2A_ERROR] sentinel prefix) to a one-line operator-actionable
* hint. Pattern matches are lowercase substring checks, ordered most-
* specific first so the right hint wins when multiple patterns
* overlap (e.g. "control request timeout" wins over generic "timeout").
*
* Used by both the chat Agent Comms panel and the Activity tab so the
* same symptom reads identically across surfaces. Two prior copies
* had already drifted (Activity tab gained `not found`/`offline`
* cases AgentCommsPanel never picked up) this module is the merged
* superset and the only place hint text should change.
*/
export function inferA2AErrorHint(detail: string): string {
const t = detail.toLowerCase();
// "control request timeout" is the specific Claude Code SDK init
// wedge symptom. Pattern on the full phrase, not bare "initialize"
// — a user task containing "failed to initialize database" would
// false-positive into the SDK-wedge hint.
if (t.includes("control request timeout")) {
return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
}
if (
t.includes("readtimeout") ||
t.includes("connecttimeout") ||
t.includes("deadline exceeded") ||
t.includes("timeout")
) {
return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task, or the runtime is stuck — restart the workspace if this repeats.";
}
if (
t.includes("connectionreset") ||
t.includes("remoteprotocolerror") ||
t.includes("connection reset") ||
t.includes("no message")
) {
return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip — retry once. If it repeats, the remote container may have crashed mid-request; check its logs.";
}
if (t.includes("agent error") || t.includes("exception")) {
return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
}
if (
t.includes("not found") ||
t.includes("not accessible") ||
t.includes("offline")
) {
return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
}
if (detail === "") {
return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
}
return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
}

View File

@ -0,0 +1,23 @@
/**
* Sliding-window log for the in-chat activity feed (the live progress
* lines under the spinner while a chat reply is in flight).
*
* Sized to fit the spinner area without forcing a scroll; per-tool-use
* rows from the workspace's _report_tool_use can fire dozens per turn
* (Read 5 files + Grep + Bash + Edits + delegations), so a too-small
* window flushes useful early context before the user can read it.
*
* Consecutive identical lines collapse to a single entry the same
* tool repeated on the same target (e.g. Read of the same file twice
* within a turn) is noise, not new progress.
*/
export const ACTIVITY_LOG_WINDOW = 20;
export function appendActivityLine(prev: string[], line: string): string[] {
if (prev[prev.length - 1] === line) return prev; // collapse duplicates
const next =
prev.length >= ACTIVITY_LOG_WINDOW
? prev.slice(-(ACTIVITY_LOG_WINDOW - 1))
: prev;
return [...next, line];
}

View File

@ -32,6 +32,64 @@ export function extractTextsFromParts(parts: unknown): string | null {
return texts.length > 0 ? texts.join("\n") : null;
}
export interface ParsedFilePart {
name: string;
uri: string;
mimeType?: string;
size?: number;
}
/** Extract file parts from an A2A response. Walks parts[] + artifacts[].
* Per the A2A spec a file part looks like:
* { kind: "file", file: { name, mimeType, uri | bytes } }
* We only surface parts that carry a `uri` inline bytes would
* require a different renderer (data URL) and are out of scope for
* MVP. Names fall back to the URI's basename when absent. */
export function extractFilesFromTask(task: Record<string, unknown>): ParsedFilePart[] {
const out: ParsedFilePart[] = [];
const pushFromParts = (parts: unknown) => {
if (!Array.isArray(parts)) return;
for (const raw of parts as Array<Record<string, unknown>>) {
if (raw.kind !== "file" && raw.type !== "file") continue;
const file = (raw.file ?? raw) as Record<string, unknown>;
const uri = typeof file.uri === "string" ? file.uri : "";
if (!uri) continue;
const name = (typeof file.name === "string" && file.name) || basename(uri);
out.push({
name,
uri,
mimeType: typeof file.mimeType === "string" ? file.mimeType : undefined,
size: typeof file.size === "number" ? file.size : undefined,
});
}
};
try {
pushFromParts(task.parts);
const artifacts = task.artifacts as Array<Record<string, unknown>> | undefined;
if (artifacts) for (const a of artifacts) pushFromParts(a.parts);
const status = task.status as Record<string, unknown> | undefined;
if (status?.message) {
const msg = status.message as Record<string, unknown>;
pushFromParts(msg.parts);
}
// Some A2A servers wrap a non-task reply as
// {result: {message: {parts: [...]}}} rather than {result: {parts}}.
// Without this branch we'd silently drop file parts returned by
// third-party implementations.
const message = task.message as Record<string, unknown> | undefined;
if (message) pushFromParts(message.parts);
} catch {
/* tolerate malformed shapes — chat falls through to text-only */
}
return out;
}
function basename(uri: string): string {
const cleaned = uri.replace(/^workspace:/, "").replace(/^https?:\/\//, "");
const slash = cleaned.lastIndexOf("/");
return slash >= 0 ? cleaned.slice(slash + 1) : cleaned || "file";
}
/** Extract user message text from an activity log request_body */
export function extractRequestText(body: Record<string, unknown> | null): string {
if (!body) return "";
@ -41,22 +99,54 @@ export function extractRequestText(body: Record<string, unknown> | null): string
return (parts?.[0]?.text as string) || "";
}
/** Extract text from an activity log response_body (multiple possible formats) */
/** Extract text from an activity log response_body (multiple possible formats).
*
* Collects from EVERY source top-level `parts[].text`, `parts[].root.text`
* (older nested shape), and `artifacts[].parts[].text` (task-shaped
* replies) and joins them with "\n". Two reasons to collect rather
* than early-return:
*
* 1. Claude Code and other long-reply runtimes emit multiple text
* parts in a single `parts` array. Returning just the first
* silently truncates 15k-char briefs to their leading line
* (observed UX A/B Lab Wave 1, 2026-04-25).
*
* 2. Some producers emit a summary in `parts[].text` AND details in
* `artifacts[].parts[].text` (Hermes does this for tool calls).
* The previous "first source wins" returned only the summary;
* artifacts dropped silently. */
export function extractResponseText(body: Record<string, unknown>): string {
try {
// {result: "text"} — from MCP server delegation logs
if (typeof body.result === "string") return body.result;
// A2A JSON-RPC response: {result: {parts: [{kind: "text", text: "..."}]}}
const result = body.result as Record<string, unknown> | undefined;
if (result) {
const collected: string[] = [];
// A2A JSON-RPC: {result: {parts: [{kind: "text", text: "..."}]}}
const fromParts = extractTextsFromParts(result.parts);
if (fromParts) collected.push(fromParts);
// Older nested shape: {parts: [{root: {text: "..."}}]}
const parts = (result.parts || []) as Array<Record<string, unknown>>;
const rootTexts: string[] = [];
for (const p of parts) {
const t = (p.text as string) || "";
if (t) return t;
const root = p.root as Record<string, unknown> | undefined;
if (root?.text) return root.text as string;
if (root?.text) rootTexts.push(root.text as string);
}
if (rootTexts.length > 0) collected.push(rootTexts.join("\n"));
// Task shape: {result: {artifacts: [{parts: [...]}]}}
const artifacts = result.artifacts as Array<Record<string, unknown>> | undefined;
if (artifacts) {
for (const a of artifacts) {
const t = extractTextsFromParts(a.parts);
if (t) collected.push(t);
}
}
if (collected.length > 0) return collected.join("\n");
}
// {task: "text"} — request body format, shouldn't be in response but handle it

View File

@ -1,12 +1,38 @@
/** One file attached to a chat message. Shared shape for both
* directions: when a user attaches a file the UI uploads it and
* stashes the returned metadata here; when an agent returns a
* `kind: file` part in an A2A response, the parser populates the
* same fields. `uri` uses the `workspace:<abs-path>` scheme the
* server returns the renderer translates that to a download
* request against GET /workspaces/:id/chat/download. */
export interface ChatAttachment {
name: string;
uri: string;
mimeType?: string;
size?: number;
}
export interface ChatMessage {
id: string;
role: "user" | "agent" | "system";
content: string;
/** Attachments sent with or returned alongside this message. */
attachments?: ChatAttachment[];
timestamp: string; // ISO string for serialization
}
export function createMessage(role: ChatMessage["role"], content: string): ChatMessage {
return { id: crypto.randomUUID(), role, content, timestamp: new Date().toISOString() };
export function createMessage(
role: ChatMessage["role"],
content: string,
attachments?: ChatAttachment[],
): ChatMessage {
return {
id: crypto.randomUUID(),
role,
content,
attachments: attachments && attachments.length > 0 ? attachments : undefined,
timestamp: new Date().toISOString(),
};
}
// appendMessageDeduped adds a ChatMessage to `prev` unless the tail
@ -25,11 +51,23 @@ export function createMessage(role: ChatMessage["role"], content: string): ChatM
// messages ("hi", "hi") from a real user/agent still render.
export function appendMessageDeduped(prev: ChatMessage[], msg: ChatMessage, dedupeWindowMs = 3000): ChatMessage[] {
const cutoff = Date.now() - dedupeWindowMs;
const sig = attachmentSignature(msg.attachments);
const alreadyThere = prev.some((m) => {
if (m.role !== msg.role || m.content !== msg.content) return false;
// Attachments participate in the dedupe key so a text-only push
// doesn't shadow the file-carrying HTTP response (and vice versa).
// When both carry the same text AND the same files, collapse.
if (attachmentSignature(m.attachments) !== sig) return false;
const t = Date.parse(m.timestamp);
return !Number.isNaN(t) && t >= cutoff;
});
if (alreadyThere) return prev;
return [...prev, msg];
}
function attachmentSignature(atts: ChatAttachment[] | undefined): string {
if (!atts || atts.length === 0) return "";
// URI is the stable identity — name can differ across delivery
// paths (agent vs our parser's basename fallback).
return atts.map((a) => a.uri).sort().join("|");
}

View File

@ -0,0 +1,135 @@
import { PLATFORM_URL } from "@/lib/api";
import { getTenantSlug } from "@/lib/tenant";
import type { ChatAttachment } from "./types";
/** Chat attachments are intentionally uploaded via a direct fetch()
* instead of the `api.post` helper `api.post` JSON-stringifies the
* body, which would 500 on a Blob. Mirrors the header plumbing
* (tenant slug, admin token, credentials) so SaaS + self-hosted
* callers work the same way. */
export async function uploadChatFiles(
workspaceId: string,
files: File[],
): Promise<ChatAttachment[]> {
if (files.length === 0) return [];
const form = new FormData();
for (const f of files) form.append("files", f, f.name);
const headers: Record<string, string> = {};
const slug = getTenantSlug();
if (slug) headers["X-Molecule-Org-Slug"] = slug;
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
// Uploads legitimately take a while on cold cache (tar write +
// docker cp into the container). 60s is comfortable for the 25MB/
// 50MB caps the server enforces.
const res = await fetch(`${PLATFORM_URL}/workspaces/${workspaceId}/chat/uploads`, {
method: "POST",
headers,
body: form,
credentials: "include",
signal: AbortSignal.timeout(60_000),
});
if (!res.ok) {
const text = await res.text().catch(() => "");
throw new Error(`upload failed: ${res.status} ${text}`);
}
const json = (await res.json()) as { files: ChatAttachment[] };
return json.files ?? [];
}
/** Resolve a file URI into a browser-downloadable URL. Accepts:
* - `workspace:<abs-path>` (our canonical form)
* - `file:///workspace/...` (some agents emit this)
* - `/workspace/...` (bare absolute path inside the container)
* Everything that looks like an allowed-root container path is
* rewritten to the authenticated /chat/download endpoint. HTTP(S)
* URIs pass through unchanged so we can also render links to
* artefacts hosted off-platform. Unknown schemes fall back to the
* raw URI the caller gets to decide how to render it. */
export function resolveAttachmentHref(
workspaceId: string,
uri: string,
): string {
const containerPath = normalizeWorkspaceUri(uri);
if (containerPath) {
return `${PLATFORM_URL}/workspaces/${workspaceId}/chat/download?path=${encodeURIComponent(containerPath)}`;
}
return uri;
}
/** Extracts the absolute container path from a workspace-scoped URI,
* or null if the URI isn't a container path. The matching roots
* mirror the server's `allowedRoots` allowlist. */
const ALLOWED_CONTAINER_ROOTS = ["/configs", "/workspace", "/home", "/plugins"];
function normalizeWorkspaceUri(uri: string): string | null {
let path: string | null = null;
if (uri.startsWith("workspace:")) {
path = uri.slice("workspace:".length);
} else if (uri.startsWith("file:///")) {
path = uri.slice("file://".length); // keep the leading slash
} else if (uri.startsWith("/")) {
path = uri;
}
if (!path) return null;
// Only rewrite when the path lands in an allowed root; otherwise
// return null so the caller falls through to raw-URI handling
// (which will open a new tab for HTTP-ish schemes).
for (const root of ALLOWED_CONTAINER_ROOTS) {
if (path === root || path.startsWith(root + "/")) return path;
}
return null;
}
/** Trigger a browser download for an attachment. Uses fetch+blob
* rather than an anchor navigation because the download endpoint
* requires workspace auth and the browser won't attach
* `Authorization: Bearer` or `X-Molecule-Org-Slug` to a bare anchor
* click. A 25MB per-file cap server-side keeps the blob buffer
* bounded. HTTP(S) URIs skip the fetch path and open directly
* since they're off-platform artefacts that we don't own auth for. */
export async function downloadChatFile(
workspaceId: string,
attachment: ChatAttachment,
): Promise<void> {
const href = resolveAttachmentHref(workspaceId, attachment.uri);
const isContainerPath = normalizeWorkspaceUri(attachment.uri) !== null;
if (!isContainerPath) {
// External URL — let the browser navigate. Opens in new tab so
// the canvas context survives a navigation. `href` here is the
// raw URI (http(s), or anything else the agent sent back).
window.open(href, "_blank", "noopener,noreferrer");
return;
}
const headers: Record<string, string> = {};
const slug = getTenantSlug();
if (slug) headers["X-Molecule-Org-Slug"] = slug;
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
const res = await fetch(href, {
headers,
credentials: "include",
signal: AbortSignal.timeout(60_000),
});
if (!res.ok) {
throw new Error(`download failed: ${res.status}`);
}
const blob = await res.blob();
// Revoke the object URL after the click — browsers hold the blob
// until the URL is either revoked or the document unloads. 30s is
// plenty of headroom for the click → save dialog round-trip.
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = attachment.name;
a.rel = "noopener";
document.body.appendChild(a);
a.click();
a.remove();
setTimeout(() => URL.revokeObjectURL(url), 30_000);
}

View File

@ -0,0 +1,170 @@
"use client";
import { useCallback, useState, type ReactNode } from "react";
import { api } from "@/lib/api";
import {
checkDeploySecrets,
resolveRuntime,
type PreflightResult,
type Template,
} from "@/lib/deploy-preflight";
import { MissingKeysModal } from "@/components/MissingKeysModal";
/**
* useTemplateDeploy shared preflight + POST + modal wiring for
* every surface that deploys a workspace from a template.
*
* Owns: `checkDeploySecrets` call, `MissingKeysModal` render, the
* `POST /workspaces` that follows, and per-template `deploying`
* state. Returns `modal` as a `ReactNode` ready to place inline.
*
* Why a hook rather than two copies: the runtime-fallback table
* (`resolveRuntime`) and the preflight wiring were previously
* copy-pasted between TemplatePalette and EmptyState. When the
* copies drifted (palette had the full id-to-runtime map,
* empty-state had only the `-default` strip), the two surfaces
* could silently disagree on future templates that need a
* non-identity mapping. Single owner closes the drift surface.
*/
export interface UseTemplateDeployOptions {
/** Compute canvas coords for the new workspace. Called once per
* successful deploy. Defaults to random coords in the [100, 500] ×
* [100, 400] band, matching the sidebar palette's historical
* placement. Override for surfaces that want deterministic
* placement (e.g. EmptyState's first-deploy "center-ish" target). */
canvasCoords?: () => { x: number; y: number };
/** Optional post-deploy side effect passed the id of the new
* workspace. EmptyState uses this to auto-select the node and
* flip the side panel to Chat so a fresh tenant sees something
* useful. */
onDeployed?: (workspaceId: string) => void;
}
/** Paired template + preflight result carried through the "user
* clicked deploy modal opens keys saved retry" loop. Named
* so the `useState` generic and any future signature change have
* a single place to track. */
interface MissingKeysInfo {
template: Template;
preflight: PreflightResult;
}
export interface UseTemplateDeployResult {
/** Template id currently being deployed (incl. the preflight
* network call), or null when idle. Callers pass this to disable
* the relevant button and show a spinner. */
deploying: string | null;
/** Last deploy error message, or null. Cleared on next `deploy`
* call. */
error: string | null;
/** Kick off a deploy. Opens the missing-keys modal if preflight
* returns not-ok; otherwise fires POST /workspaces directly. */
deploy: (template: Template) => Promise<void>;
/** The missing-keys modal, ready to place inline. Always non-null
* (the underlying component self-gates on `open`), so the caller
* can drop `{modal}` anywhere without conditionals. */
modal: ReactNode;
}
export function useTemplateDeploy(
options: UseTemplateDeployOptions = {},
): UseTemplateDeployResult {
const [deploying, setDeploying] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [missingKeysInfo, setMissingKeysInfo] = useState<MissingKeysInfo | null>(null);
const { canvasCoords, onDeployed } = options;
/** Actually execute the POST /workspaces call. Split from `deploy`
* so the "modal → keys added → retry" path can reuse it without
* re-running preflight (the user just proved the keys are now set). */
const executeDeploy = useCallback(
async (template: Template) => {
setDeploying(template.id);
setError(null);
try {
const coords = canvasCoords
? canvasCoords()
: {
x: Math.random() * 400 + 100,
y: Math.random() * 300 + 100,
};
const ws = await api.post<{ id: string }>("/workspaces", {
name: template.name,
template: template.id,
tier: template.tier,
canvas: coords,
});
onDeployed?.(ws.id);
} catch (e) {
setError(e instanceof Error ? e.message : "Deploy failed");
} finally {
setDeploying(null);
}
},
[canvasCoords, onDeployed],
);
const deploy = useCallback(
async (template: Template) => {
setDeploying(template.id);
setError(null);
let preflight: PreflightResult;
try {
const runtime = template.runtime ?? resolveRuntime(template.id);
preflight = await checkDeploySecrets({
runtime,
models: template.models,
required_env: template.required_env,
});
} catch (e) {
// Preflight network failure used to strand `deploying` — the
// button stayed disabled forever because the throw bypassed
// the setDeploying(null) in the non-ok branch below. Any
// future refactor that drops this try block will regress the
// same way; keep it narrow around just the preflight call
// so a successful preflight still lets executeDeploy own
// its own error path.
setError(e instanceof Error ? e.message : "Preflight check failed");
setDeploying(null);
return;
}
if (!preflight.ok) {
setMissingKeysInfo({ template, preflight });
setDeploying(null);
return;
}
await executeDeploy(template);
},
[executeDeploy],
);
// No useCallback here — consumers call this on every render anyway
// (it's placed inline in JSX), and useCallback's deps would
// invalidate on every state change, making the memoisation a wash.
// Plain ReactNode is simpler and equally performant.
const modal: ReactNode = (
<MissingKeysModal
open={!!missingKeysInfo}
missingKeys={missingKeysInfo?.preflight.missingKeys ?? []}
providers={missingKeysInfo?.preflight.providers ?? []}
runtime={missingKeysInfo?.preflight.runtime ?? ""}
onKeysAdded={() => {
if (missingKeysInfo) {
const template = missingKeysInfo.template;
setMissingKeysInfo(null);
// Intentional fire-and-forget — executeDeploy manages
// its own error state via setError.
void executeDeploy(template);
}
}}
onCancel={() => setMissingKeysInfo(null)}
/>
);
return { deploying, error, deploy, modal };
}

View File

@ -7,7 +7,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
const mockFetch = vi.fn();
globalThis.fetch = mockFetch;
import { api } from "../api";
import { api, PlatformUnavailableError } from "../api";
// ---------------------------------------------------------------------------
// Helpers
@ -380,3 +380,99 @@ describe("api request timeout signal", () => {
expect(sigA).not.toBe(sigB);
});
});
// ---------------------------------------------------------------------------
// PlatformUnavailableError classification
// ---------------------------------------------------------------------------
//
// When the platform's wsauth middleware can't reach Postgres/Redis to
// validate a token, it returns 503 + {error, code:"platform_unavailable"}.
// api.ts must surface that as a typed error so the page-level renderer
// can show a dedicated diagnostic instead of a generic 5xx toast.
describe("PlatformUnavailableError classification", () => {
beforeEach(() => {
mockFetch.mockReset();
});
function mock503Platform(detail = "platform datastore unavailable — retry shortly") {
const body = JSON.stringify({ error: detail, code: "platform_unavailable" });
mockFetch.mockResolvedValueOnce({
ok: false,
status: 503,
json: () => Promise.reject(new Error("not used")),
text: () => Promise.resolve(body),
} as unknown as Response);
}
it("throws PlatformUnavailableError on 503 + code=platform_unavailable", async () => {
mock503Platform();
let thrown: unknown;
try {
await api.get("/workspaces");
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(PlatformUnavailableError);
expect((thrown as PlatformUnavailableError).code).toBe("platform_unavailable");
});
it("preserves the server-provided error string as the Error message", async () => {
mock503Platform("Postgres unreachable");
try {
await api.get("/workspaces");
} catch (e) {
expect(e).toBeInstanceOf(PlatformUnavailableError);
expect((e as Error).message).toBe("Postgres unreachable");
return;
}
throw new Error("expected to throw");
});
it("does NOT classify a generic 503 (no platform_unavailable code) as PlatformUnavailableError", async () => {
// Generic upstream-busy 503 — should keep the legacy generic-Error
// path so existing busy-retry UX isn't disrupted.
mockFetch.mockResolvedValueOnce({
ok: false,
status: 503,
json: () => Promise.reject(new Error("not used")),
text: () => Promise.resolve(JSON.stringify({ error: "upstream busy" })),
} as unknown as Response);
try {
await api.get("/workspaces/x/a2a");
} catch (e) {
expect(e).not.toBeInstanceOf(PlatformUnavailableError);
expect((e as Error).message).toContain("503");
return;
}
throw new Error("expected to throw");
});
it("does NOT classify on 500 (server kept legacy 500 for true internal errors)", async () => {
mockFailure(500, "boom");
try {
await api.get("/workspaces");
} catch (e) {
expect(e).not.toBeInstanceOf(PlatformUnavailableError);
return;
}
throw new Error("expected to throw");
});
it("falls back to generic Error when 503 body isn't JSON", async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 503,
json: () => Promise.reject(new Error("not used")),
text: () => Promise.resolve("Service Unavailable"),
} as unknown as Response);
try {
await api.get("/workspaces");
} catch (e) {
expect(e).not.toBeInstanceOf(PlatformUnavailableError);
expect((e as Error).message).toContain("503");
return;
}
throw new Error("expected to throw");
});
});

View File

@ -107,11 +107,39 @@ async function request<T>(
}
if (!res.ok) {
const text = await res.text();
// Recognise the platform's structured "datastore unreachable"
// shape (returned by wsauth_middleware.abortAuthLookupError when
// Postgres/Redis is down). Surface as a typed error so callers
// can render a dedicated diagnostic instead of a generic toast.
if (res.status === 503 && text) {
try {
const parsed = JSON.parse(text) as { code?: string; error?: string };
if (parsed.code === "platform_unavailable") {
throw new PlatformUnavailableError(parsed.error || "platform datastore unavailable");
}
} catch (err) {
// Re-throw the typed error if that's what we just constructed.
// JSON.parse failures fall through to the generic Error below.
if (err instanceof PlatformUnavailableError) throw err;
}
}
throw new Error(`API ${method} ${path}: ${res.status} ${text}`);
}
return res.json();
}
/** Thrown when the platform reports its datastore (Postgres/Redis) is
* unreachable. Surface with a dedicated diagnostic UI rather than a
* generic API-error toast the user's next action is to check local
* services, not to retry the API call. */
export class PlatformUnavailableError extends Error {
readonly code = "platform_unavailable" as const;
constructor(message: string) {
super(message);
this.name = "PlatformUnavailableError";
}
}
export const api = {
get: <T>(path: string, options?: RequestOptions) => request<T>("GET", path, undefined, 0, options),
post: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("POST", path, body, 0, options),

View File

@ -33,6 +33,46 @@ export interface TemplateLike {
required_env?: string[];
}
/** Full /templates response shape shared by TemplatePalette (sidebar)
* and EmptyState (welcome grid). Was previously re-declared in each
* with subtly different fields EmptyState's narrower shape silently
* dropped `runtime`, `models`, and `required_env`, so the preflight
* couldn't see provider alternatives the template declared. Keep this
* the single source of truth. */
export interface Template extends TemplateLike {
id: string;
name: string;
description: string;
tier: number;
model: string;
skills: string[];
skill_count: number;
}
/** Map from a template id to the runtime name the per-workspace
* preflight expects. Used only when the server's `/templates`
* response predates the `runtime` field on the summary (legacy
* installs) modern responses carry it verbatim. Strip `-default`
* for the claude-code template and identity-map everything else
* that matches our current runtime registry.
*
* Lives in the preflight module (not TemplatePalette) so EmptyState
* uses the SAME fallback table. A previous duplication in both call
* sites left EmptyState with only the `-default` suffix strip, which
* would silently disagree with TemplatePalette on templates whose
* id needs a non-identity mapping. */
export function resolveRuntime(templateId: string): string {
const runtimeMap: Record<string, string> = {
langgraph: "langgraph",
"claude-code-default": "claude-code",
openclaw: "openclaw",
deepagents: "deepagents",
crewai: "crewai",
autogen: "autogen",
};
return runtimeMap[templateId] ?? templateId.replace(/-default$/, "");
}
export interface SecretEntry {
key: string;
has_value: boolean;

View File

@ -5,27 +5,34 @@ import { describe, it, expect, beforeEach, vi } from "vitest";
global.fetch = vi.fn();
import { useCanvasStore } from "../canvas";
import type { WorkspaceData } from "../socket";
import type { WorkspaceNodeData } from "../canvas";
function makeWS(overrides: Partial<WorkspaceData> & { id: string }): WorkspaceData {
function makeWS(
overrides: Partial<WorkspaceNodeData> & { id: string },
): WorkspaceNodeData {
// makeWS builds a minimal WorkspaceNodeData for tests that set state
// directly on the store (bypassing hydrate). The `id` override is
// ignored — node IDs live on the outer Node<> wrapper, not inside
// `data`. It's accepted here so callers can keep their existing
// `makeWS({ id: "ws-foo" })` call sites even though the id is only
// used on the Node<> wrapper at the call site.
void overrides.id;
return {
name: "WS",
role: "agent",
tier: 1,
status: "online",
agent_card: null,
agentCard: null,
url: "http://localhost:9000",
parent_id: null,
active_tasks: 0,
last_error_rate: 0,
last_sample_error: "",
uptime_seconds: 60,
current_task: "",
x: 0,
y: 0,
parentId: null,
activeTasks: 0,
lastErrorRate: 0,
lastSampleError: "",
currentTask: "",
collapsed: false,
runtime: "",
budget_limit: null,
needsRestart: false,
budgetLimit: null,
...overrides,
};
}
@ -148,13 +155,13 @@ describe("batchRestart — partial failure", () => {
id: "ws-ok",
type: "workspace",
position: { x: 0, y: 0 },
data: { ...makeWS({ id: "ws-ok" }), needsRestart: true } as WorkspaceData & { needsRestart: boolean },
data: { ...makeWS({ id: "ws-ok" }), needsRestart: true } as WorkspaceNodeData,
},
{
id: "ws-fail",
type: "workspace",
position: { x: 0, y: 0 },
data: { ...makeWS({ id: "ws-fail" }), needsRestart: true } as WorkspaceData & { needsRestart: boolean },
data: { ...makeWS({ id: "ws-fail" }), needsRestart: true } as WorkspaceNodeData,
},
],
selectedNodeIds: new Set(["ws-ok", "ws-fail"]),
@ -166,7 +173,7 @@ describe("batchRestart — partial failure", () => {
});
const byId = Object.fromEntries(
useCanvasStore.getState().nodes.map((n) => [n.id, n.data as WorkspaceData & { needsRestart?: boolean }])
useCanvasStore.getState().nodes.map((n) => [n.id, n.data as WorkspaceNodeData])
);
expect(byId["ws-ok"].needsRestart).toBe(false);
expect(byId["ws-fail"].needsRestart).toBe(true);
@ -179,7 +186,7 @@ describe("batchRestart — partial failure", () => {
id: "ws-fail",
type: "workspace",
position: { x: 0, y: 0 },
data: { ...makeWS({ id: "ws-fail" }), needsRestart: true } as WorkspaceData & { needsRestart: boolean },
data: { ...makeWS({ id: "ws-fail" }), needsRestart: true } as WorkspaceNodeData,
},
],
selectedNodeIds: new Set(["ws-fail"]),

View File

@ -67,7 +67,19 @@ describe("canvas-events molecule:pan-to-node dispatch", () => {
vi.restoreAllMocks();
});
it("dispatches molecule:pan-to-node with the new nodeId for a NEW provision", () => {
it("dispatches both molecule:pan-to-node AND molecule:fit-deploying-org for a NEW root-level provision", () => {
// Two custom events are dispatched on NEW root-level provision:
// 1. molecule:fit-deploying-org — tells useCanvasViewport to
// frame the whole deploying subtree. Fires for root nodes
// too (commit 5adc8a74) so the canvas centers the just-
// landed root immediately instead of waiting for the
// first child to arrive.
// 2. molecule:pan-to-node — pans/zooms to the single node;
// only for standalone creates (no parent), so org-import
// children don't chase the spawn animation.
// A previous version of this test expected only #2 and failed
// when #1 was added for roots. If only one of these ever fires
// again, this test should flag the regression.
const { get, set } = makeStore([]);
const dispatched: Event[] = [];
const spy = vi.spyOn(window, "dispatchEvent").mockImplementation((e) => {
@ -81,9 +93,15 @@ describe("canvas-events molecule:pan-to-node dispatch", () => {
set
);
expect(dispatched).toHaveLength(1);
expect(dispatched[0].type).toBe("molecule:pan-to-node");
expect((dispatched[0] as CustomEvent).detail?.nodeId).toBe("ws-new");
expect(dispatched).toHaveLength(2);
const panEvent = dispatched.find((e) => e.type === "molecule:pan-to-node");
const fitEvent = dispatched.find((e) => e.type === "molecule:fit-deploying-org");
expect(panEvent, "molecule:pan-to-node should fire for standalone create").toBeDefined();
expect(fitEvent, "molecule:fit-deploying-org should fire so the viewport frames the root").toBeDefined();
expect((panEvent as CustomEvent).detail?.nodeId).toBe("ws-new");
expect((fitEvent as CustomEvent).detail?.rootId).toBe("ws-new");
spy.mockRestore();
});
it("does NOT dispatch molecule:pan-to-node when restarting an existing node", () => {

View File

@ -149,6 +149,75 @@ describe("buildNodesAndEdges parent + child workspaces", () => {
});
});
describe("buildNodesAndEdges auto-rescue respects live grown parent size", () => {
// Regression: child the user dragged into a user-grown area was
// false-rescued by every periodic rehydrate (socket health check
// every 30s) because the rescue heuristic used the initial
// grid-derived parent bbox, not the currently-grown size. Result:
// child snapped to a stale grid slot, then settled back ~1 frame
// later when growParentsToFitChildren re-ran. Observed 2026-04-25
// as "child jumps to weird location, then 30s later it's fine".
it("does NOT rescue a child placed inside the user-grown parent area", () => {
// Parent's initial grid-derived size is small; user has since grown it
// to 800×600. Child sits at relative (700, 400) — inside the grown
// bbox but outside the initial bbox. Without currentParentSizes,
// the rescue would re-place the child into a default grid slot.
const parentAbs = { x: 100, y: 100 };
const childAbs = { x: parentAbs.x + 700, y: parentAbs.y + 400 };
const workspaces = [
makeWS({ id: "parent", x: parentAbs.x, y: parentAbs.y }),
makeWS({ id: "child", parent_id: "parent", x: childAbs.x, y: childAbs.y }),
];
const grownDims = new Map([
["parent", { width: 800, height: 600 }],
]);
const { nodes } = buildNodesAndEdges(workspaces, new Map(), grownDims);
const child = nodes.find((n) => n.id === "child")!;
// Child's relative position should match what we passed in.
expect(child.position).toEqual({ x: 700, y: 400 });
});
it("DOES rescue a child whose stored position is outside even the grown parent", () => {
// Same parent but child is way outside (relative 5000, 5000).
// The rescue must still fire — the heuristic isn't "always trust
// the user", it's "trust the user up to the current parent bbox".
const parentAbs = { x: 100, y: 100 };
const childAbs = { x: parentAbs.x + 5000, y: parentAbs.y + 5000 };
const workspaces = [
makeWS({ id: "parent", x: parentAbs.x, y: parentAbs.y }),
makeWS({ id: "child", parent_id: "parent", x: childAbs.x, y: childAbs.y }),
];
const grownDims = new Map([
["parent", { width: 800, height: 600 }],
]);
const { nodes } = buildNodesAndEdges(workspaces, new Map(), grownDims);
const child = nodes.find((n) => n.id === "child")!;
// Rescued: NOT the original (5000, 5000); some grid slot instead.
expect(child.position.x).toBeLessThan(5000);
expect(child.position.y).toBeLessThan(5000);
});
it("falls back to initial-min bbox when no live size is provided (preserves legacy behavior)", () => {
// Empty currentParentSizes — first hydrate or test without store
// priming. Child outside the initial bbox should still be rescued.
const parentAbs = { x: 100, y: 100 };
const childAbs = { x: parentAbs.x + 700, y: parentAbs.y + 400 };
const workspaces = [
makeWS({ id: "parent", x: parentAbs.x, y: parentAbs.y }),
makeWS({ id: "child", parent_id: "parent", x: childAbs.x, y: childAbs.y }),
];
const { nodes } = buildNodesAndEdges(workspaces);
const child = nodes.find((n) => n.id === "child")!;
// Without a live size hint, the initial bbox applies — rescue
// fires, child gets a fresh slot, NOT the user-supplied (700,400).
expect(child.position).not.toEqual({ x: 700, y: 400 });
});
});
describe("buildNodesAndEdges deeply nested hierarchy", () => {
it("handles three levels of nesting", () => {
const workspaces = [

View File

@ -484,6 +484,70 @@ describe("removeNode", () => {
});
});
// ---------- removeSubtree ----------
describe("removeSubtree", () => {
beforeEach(() => {
useCanvasStore.getState().hydrate([
makeWS({ id: "root" }),
makeWS({ id: "mid", parent_id: "root" }),
makeWS({ id: "leaf", parent_id: "mid" }),
makeWS({ id: "sibling", parent_id: "root" }),
makeWS({ id: "unrelated" }), // separate root
]);
});
it("removes the root and every descendant in one shot", () => {
useCanvasStore.getState().removeSubtree("root");
const ids = useCanvasStore
.getState()
.nodes.map((n) => n.id)
.sort();
expect(ids).toEqual(["unrelated"]);
});
it("removes a mid-level node and its descendants but leaves siblings + ancestors", () => {
useCanvasStore.getState().removeSubtree("mid");
const ids = useCanvasStore
.getState()
.nodes.map((n) => n.id)
.sort();
expect(ids).toEqual(["root", "sibling", "unrelated"]);
});
it("removing a leaf is a no-op cascade (just drops the leaf)", () => {
useCanvasStore.getState().removeSubtree("leaf");
const ids = useCanvasStore
.getState()
.nodes.map((n) => n.id)
.sort();
expect(ids).toEqual(["mid", "root", "sibling", "unrelated"]);
});
it("clears selection when the selected node is anywhere in the removed subtree", () => {
useCanvasStore.getState().selectNode("leaf");
useCanvasStore.getState().removeSubtree("root");
expect(useCanvasStore.getState().selectedNodeId).toBeNull();
});
it("preserves selection when the selected node is outside the removed subtree", () => {
useCanvasStore.getState().selectNode("unrelated");
useCanvasStore.getState().removeSubtree("root");
expect(useCanvasStore.getState().selectedNodeId).toBe("unrelated");
});
it("drops edges incident to any removed node", () => {
// The hydrate-built edges connect parent → child. After removing
// `root`, no edge involving root/mid/leaf/sibling should remain.
useCanvasStore.getState().removeSubtree("root");
const remaining = useCanvasStore.getState().edges;
for (const e of remaining) {
expect(["root", "mid", "leaf", "sibling"]).not.toContain(e.source);
expect(["root", "mid", "leaf", "sibling"]).not.toContain(e.target);
}
});
});
// ---------- isDescendant ----------
describe("isDescendant", () => {

View File

@ -1,7 +1,7 @@
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
// ---------------------------------------------------------------------------
// Mock the canvas store before importing socket.ts
// Mock the canvas store and api before importing socket.ts
// ---------------------------------------------------------------------------
vi.mock("../canvas", () => ({
useCanvasStore: {
@ -13,6 +13,7 @@ vi.mock("../canvas", () => ({
},
}));
// ---------------------------------------------------------------------------
// Mock WebSocket
// ---------------------------------------------------------------------------
@ -76,7 +77,6 @@ function getLastWS(): MockWebSocket {
beforeEach(() => {
MockWebSocket.instances = [];
vi.useFakeTimers();
// Reset mocked store state
vi.mocked(useCanvasStore.getState).mockReturnValue({
applyEvent: vi.fn(),
@ -263,13 +263,59 @@ describe("WebSocket onclose auto-reconnect", () => {
const ws = getLastWS();
ws.triggerClose();
// Fast-forward timers to trigger the reconnect
vi.runAllTimers();
// First reconnect attempt is scheduled at 1s (Math.min(1000 * 2^0,
// 30000)). Advance just past that — vi.runAllTimers() would
// additionally re-fire the fallback poll setInterval forever and
// hit the 10000-timer abort.
vi.advanceTimersByTime(1100);
expect(MockWebSocket.instances.length).toBeGreaterThan(1);
});
});
describe("HTTP fallback poll while WS unhealthy", () => {
it("starts a setInterval after onclose so /workspaces stays fresh", () => {
const setIntervalSpy = vi.spyOn(globalThis, "setInterval");
connectSocket();
const ws = getLastWS();
ws.triggerClose();
// The fallback poll runs at 10s; the reconnect uses setTimeout, so
// any setInterval registered between connect and close must be the
// fallback poll.
const fallbackCalls = setIntervalSpy.mock.calls.filter(
([, delay]) => delay === 10_000,
);
expect(fallbackCalls.length).toBeGreaterThan(0);
setIntervalSpy.mockRestore();
});
it("clears the fallback poll once the WS reconnects (onopen)", () => {
const clearIntervalSpy = vi.spyOn(globalThis, "clearInterval");
connectSocket();
const ws = getLastWS();
ws.triggerClose(); // starts fallback poll
clearIntervalSpy.mockClear();
// Advance past the first reconnect delay so a fresh ws exists,
// then trigger its open.
vi.advanceTimersByTime(1100);
const ws2 = getLastWS();
ws2.triggerOpen();
expect(clearIntervalSpy).toHaveBeenCalled();
clearIntervalSpy.mockRestore();
});
it("clears the fallback poll on disconnect", () => {
const clearIntervalSpy = vi.spyOn(globalThis, "clearInterval");
connectSocket();
const ws = getLastWS();
ws.triggerClose(); // starts fallback poll
clearIntervalSpy.mockClear();
disconnectSocket();
expect(clearIntervalSpy).toHaveBeenCalled();
clearIntervalSpy.mockRestore();
});
});
// ---------------------------------------------------------------------------
// onerror handler
// ---------------------------------------------------------------------------
@ -328,3 +374,45 @@ describe("health check", () => {
clearIntervalSpy.mockRestore();
});
});
// Rehydrate dedup logic itself is exercised by `RehydrateDedup` unit
// tests in this file (below). End-to-end coupling through the
// dynamic-imported `@/lib/api` was non-trivial under our existing
// fake-timer setup; isolating the gate in a pure helper keeps
// regression coverage without that mocking complexity.
import { RehydrateDedup } from "../socket";
describe("RehydrateDedup", () => {
it("first call passes the gate (no prior fetch)", () => {
const d = new RehydrateDedup(1500);
expect(d.shouldSkip(0)).toBe(false);
});
it("blocks while a fetch is in flight", () => {
const d = new RehydrateDedup(1500);
d.beginFetch();
expect(d.shouldSkip(100)).toBe(true);
});
it("blocks within the post-completion window", () => {
const d = new RehydrateDedup(1500);
d.beginFetch();
d.completeFetch(1_000);
// 1100 - 1000 = 100 < 1500 → skip
expect(d.shouldSkip(1_100)).toBe(true);
// 2600 - 1000 = 1600 > 1500 → allow
expect(d.shouldSkip(2_600)).toBe(false);
});
it("a completed fetch followed by another beginFetch blocks for the new in-flight", () => {
const d = new RehydrateDedup(1500);
d.beginFetch();
d.completeFetch(1_000);
// First wait out the dedup window
expect(d.shouldSkip(2_600)).toBe(false);
d.beginFetch();
// Now a second fetch is in flight; further calls block again
expect(d.shouldSkip(2_700)).toBe(true);
});
});

View File

@ -1,7 +1,7 @@
import type { Node, Edge } from "@xyflow/react";
import type { WSMessage } from "./socket";
import type { WorkspaceNodeData } from "./canvas";
import { extractResponseText } from "@/components/tabs/chat/message-parser";
import { extractResponseText, extractFilesFromTask } from "@/components/tabs/chat/message-parser";
// ---------------------------------------------------------------------------
// Monotonically increasing counter used to assign grid positions.
@ -21,13 +21,46 @@ import { extractResponseText } from "@/components/tabs/chat/message-parser";
//
// A monotonic counter is immune to deletions: it only ever increases.
// ---------------------------------------------------------------------------
import { appendClass, removeClass, scheduleNodeClassRemoval } from "./classNames";
let _provisioningSequence = 0;
/** Reset the sequence counter — exposed for test teardown only. */
export function resetProvisioningSequence(): void {
_provisioningSequence = 0;
_pendingOnline.clear();
}
/** WORKSPACE_ONLINE events that arrived BEFORE the matching
* WORKSPACE_PROVISIONING buffered here so the late-arriving
* provision event can immediately flip to the correct status
* instead of leaving the node stuck as "provisioning" forever.
* Cleared when applied, or on module reset (tests). */
const _pendingOnline = new Set<string>();
/** Debounced parent-grow. Each child arrival schedules this; the
* timer keeps resetting as more siblings land, so the actual
* width/height update runs ONCE after arrivals go quiet. Avoids
* the visible size-pulse that happened when growParentsToFitChildren
* ran per event. */
let _growTimer: ReturnType<typeof setTimeout> | null = null;
function scheduleParentGrow(): void {
if (typeof window === "undefined") return;
if (_growTimer) clearTimeout(_growTimer);
_growTimer = setTimeout(() => {
_growTimer = null;
import("./canvas").then(({ useCanvasStore }) => {
useCanvasStore.getState().growParentsToFitChildren?.();
});
}, 300);
}
// (absoluteNodePosition was used by an earlier "spawn from parent"
// revision that subtracted parent absolute coords from server-sent
// absolute child coords. The server now ships parent-relative coords
// directly, so the walk is no longer needed. Deleted rather than
// kept as dead code.)
/**
* Standalone event handler extracted from the canvas store.
* Applies a single WebSocket event to the current node/edge state.
@ -38,7 +71,7 @@ export function handleCanvasEvent(
nodes: Node<WorkspaceNodeData>[];
edges: Edge[];
selectedNodeId: string | null;
agentMessages: Record<string, Array<{ id: string; content: string; timestamp: string }>>;
agentMessages: Record<string, Array<{ id: string; content: string; timestamp: string; attachments?: Array<{ name: string; uri: string; mimeType?: string; size?: number }> }>>;
},
set: (partial: Record<string, unknown>) => void,
): void {
@ -47,14 +80,44 @@ export function handleCanvasEvent(
switch (msg.event) {
case "WORKSPACE_ONLINE": {
const existing = nodes.find((n) => n.id === msg.workspace_id);
if (existing) {
set({
nodes: nodes.map((n) =>
n.id === msg.workspace_id
? { ...n, data: { ...n.data, status: "online" } }
: n
),
});
if (!existing) {
// PROVISIONING event hasn't been applied yet (WS reorder or
// this tab joined mid-deploy). Buffer so the later PROVISIONING
// handler can flip status in one pass instead of leaving the
// node stuck in "provisioning" forever.
_pendingOnline.add(msg.workspace_id);
break;
}
// Flip incoming edge from blueprint → laser so the link is
// drawn solid the moment this child is live. The laser class
// plays the stroke-dashoffset keyframe once; after ~500ms the
// edge falls back to the default solid style (see
// org-deploy.css and the follow-up setTimeout below).
const updatedEdges = edges.map((e) =>
e.target === msg.workspace_id && e.className?.includes("mol-deploy-edge-blueprint")
? { ...e, className: "mol-deploy-edge-laser" }
: e,
);
set({
edges: updatedEdges,
nodes: nodes.map((n) =>
n.id === msg.workspace_id
? { ...n, data: { ...n.data, status: "online" } }
: n,
),
});
// Remove the laser class after its keyframe ends so the edge
// settles into the app's default solid styling. Fire-and-forget.
if (typeof window !== "undefined") {
const targetEdgeId = `${existing.data.parentId ?? ""}-${msg.workspace_id}`;
window.setTimeout(() => {
const s = get();
set({
edges: s.edges.map((e) =>
e.id === targetEdgeId ? { ...e, className: undefined } : e,
),
});
}, 600);
}
break;
}
@ -113,25 +176,73 @@ export function handleCanvasEvent(
),
});
} else {
// Spread new nodes in a grid so they don't stack at the viewport origin.
// Use the monotonic _provisioningSequence counter (not nodes.length) so
// deletions never cause two live nodes to share a grid slot.
const GRID_COLS = 4;
const COL_SPACING = 320;
const ROW_SPACING = 160;
const GRID_ORIGIN_X = 100;
const GRID_ORIGIN_Y = 100;
const idx = _provisioningSequence++;
const x = GRID_ORIGIN_X + (idx % GRID_COLS) * COL_SPACING;
const y = GRID_ORIGIN_Y + Math.floor(idx / GRID_COLS) * ROW_SPACING;
// Payload may carry parent_id + final x/y (org import broadcasts
// these so the canvas can animate the "spawn from parent" motion).
// Standalone workspace creates still omit them — fall back to the
// grid-slot behaviour that handled that case historically.
const parentIdRaw = (msg.payload.parent_id as string | undefined) ?? null;
const finalX = msg.payload.x as number | undefined;
const finalY = msg.payload.y as number | undefined;
let spawnX: number;
let spawnY: number;
let targetX: number;
let targetY: number;
let parentId: string | null = null;
// Place the node at its final slot immediately — no
// spring-from-parent motion. The earlier "materialize from
// parent then tween to target" was expensive (two set()
// calls + rAF) and produced wrong offsets because the
// server sends absolute coords computed against the template's
// own coord system while the client had placed the parent at
// a grid slot, so the target math always landed off-grid.
// Now: server coords are parent-relative (see org_import.go),
// we trust them verbatim.
const parentInStore = parentIdRaw
? nodes.find((n) => n.id === parentIdRaw)
: undefined;
if (parentIdRaw && parentInStore && finalX !== undefined && finalY !== undefined) {
targetX = finalX;
targetY = finalY;
parentId = parentIdRaw;
} else {
// Standalone create OR org-child whose parent hasn't arrived
// yet (rare WS reorder) — monotonic-grid placement. The
// follow-up hydrate pass reconciles parent_id + the correct
// nested position if parent lands later.
const GRID_COLS = 4;
const COL_SPACING = 320;
const ROW_SPACING = 160;
const GRID_ORIGIN_X = 100;
const GRID_ORIGIN_Y = 100;
const idx = _provisioningSequence++;
targetX = GRID_ORIGIN_X + (idx % GRID_COLS) * COL_SPACING;
targetY = GRID_ORIGIN_Y + Math.floor(idx / GRID_COLS) * ROW_SPACING;
}
spawnX = targetX;
spawnY = targetY;
// Parent→child relationship is already visible via React
// Flow's nested rendering (the child card sits INSIDE the
// parent container). An explicit edge on top of that was
// visual double-counting and made the canvas look busy;
// removed per demo feedback. A2A edges (showA2AEdges) still
// render when enabled — those represent runtime traffic,
// which nesting doesn't express.
set({
nodes: [
...nodes,
{
id: msg.workspace_id,
type: "workspaceNode",
position: { x, y },
position: { x: spawnX, y: spawnY },
// React Flow's parentId (distinct from data.parentId)
// triggers parent-relative positioning. Set it when the
// server told us this is an org-import child so the
// node renders nested inside the parent container.
...(parentId ? { parentId } : {}),
className: "mol-deploy-spawn",
data: {
name: (msg.payload.name as string) ?? "New Workspace",
status: "provisioning",
@ -143,7 +254,7 @@ export function handleCanvasEvent(
lastErrorRate: 0,
lastSampleError: "",
url: "",
parentId: null,
parentId, // data.parentId mirrors React Flow's parentId
currentTask: "",
runtime: (msg.payload.runtime as string) ?? "",
needsRestart: false,
@ -152,8 +263,76 @@ export function handleCanvasEvent(
],
});
// Pan the canvas to the new node
// Grow the parent to fit the just-landed child. DEBOUNCED
// across rapid sibling arrivals — firing width/height updates
// on every child made the parent card visibly pulse in size
// as each kid landed, which read as the parent "flashing
// around". One grow pass ~300ms after the last arrival
// coalesces the whole burst into a single layout change.
if (parentId && typeof window !== "undefined") {
scheduleParentGrow();
}
// Parent-border pulse removed per demo feedback — the soft
// box-shadow ring on each arrival compounded with the size
// grow to make the whole parent card look unstable. The
// dim-light signal on the provisioning child is sufficient
// acknowledgement that something is happening.
// Remove the one-shot spawn class after the keyframe ends so
// future re-renders don't replay it.
scheduleNodeClassRemoval(msg.workspace_id, "mol-deploy-spawn", 400, get, set);
// Auto-pan+zoom to the whole deploying org after each
// arrival so the user always sees the full picture — unless
// they've panned themselves (handled by the viewport hook,
// which aborts the fit when the user moved after the last
// auto-fit). Event name matches the existing handler in
// useCanvasViewport that knows how to compute subtree bounds.
//
// Fire for roots too (not just children) so the canvas
// centers on the just-landed root immediately instead of
// waiting for the first child to arrive ~2s later. The
// viewport hook walks UP to find the true root, so passing
// the node's own id when there's no parent is equivalent
// to passing the root.
if (typeof window !== "undefined") {
window.dispatchEvent(
new CustomEvent("molecule:fit-deploying-org", {
detail: { rootId: parentIdRaw ?? msg.workspace_id },
}),
);
}
// Race handling: if a WORKSPACE_ONLINE event beat the
// matching PROVISIONING to this tab, the online flag was
// buffered in _pendingOnline. Apply it now so the node
// doesn't stay stuck as "provisioning" forever.
//
// Only flip to "online" if the current status is still
// "provisioning" at drain time. Otherwise a WORKSPACE_DEGRADED
// / FAILED / PAUSED that arrived between the set() above and
// the scheduled drain would be silently clobbered — the
// buffered ONLINE is stale by then.
if (_pendingOnline.has(msg.workspace_id)) {
_pendingOnline.delete(msg.workspace_id);
if (typeof window !== "undefined") {
window.setTimeout(() => {
const s = get();
set({
nodes: s.nodes.map((n) =>
n.id === msg.workspace_id && n.data.status === "provisioning"
? { ...n, data: { ...n.data, status: "online" } }
: n,
),
});
}, 0);
}
}
// Pan the canvas to the new node (standalone create only —
// during an org import, zooming to every child chases the
// spawn animation around the viewport which is jarring).
if (!parentIdRaw && typeof window !== "undefined") {
window.dispatchEvent(
new CustomEvent("molecule:pan-to-node", {
detail: { nodeId: msg.workspace_id },
@ -252,12 +431,19 @@ export function handleCanvasEvent(
}
case "A2A_RESPONSE": {
// A2A proxy completed — extract response text and store as agent message.
// This gives the ChatTab instant response delivery via WebSocket instead of polling.
// A2A proxy completed — extract response text AND any `kind: file`
// parts. Without the file extraction, agent-returned attachments
// delivered via this WebSocket path would disappear (the canvas
// would render a text-only message while the HTTP fallback
// rendered the same reply with download chips, depending on
// which delivery path raced to completion first).
const responseBody = msg.payload.response_body as Record<string, unknown> | undefined;
if (responseBody) {
const text = extractResponseText(responseBody);
if (text) {
const attachments = extractFilesFromTask(
(responseBody.result ?? responseBody) as Record<string, unknown>,
);
if (text || attachments.length > 0) {
const { agentMessages } = get();
const existing = agentMessages[msg.workspace_id] || [];
set({
@ -265,7 +451,12 @@ export function handleCanvasEvent(
...agentMessages,
[msg.workspace_id]: [
...existing,
{ id: crypto.randomUUID(), content: text, timestamp: new Date().toISOString() },
{
id: crypto.randomUUID(),
content: text,
timestamp: new Date().toISOString(),
attachments: attachments.length > 0 ? attachments : undefined,
},
],
},
});

View File

@ -280,6 +280,15 @@ export function computeAutoLayout(
* Accepts an optional layoutOverrides map (from computeAutoLayout) to override
* positions for workspaces that were at 0,0.
*
* `currentParentSizes` carries the LIVE measured/grown dimensions of parent
* nodes from the existing client store. The auto-rescue heuristic below
* (line ~445) compares each child's stored relative position against its
* parent's bbox; without the live size, the bbox is whatever the
* grid-derived initial min-size formula produced. That falsely rescued
* children dragged into the user-grown area on every periodic rehydrate
* (socket.ts:87 fires every 30s if no WS events seen) observed
* 2026-04-25 as "child jumps to weird location, then settles 30s later".
*
* Parent/child rendering model: every workspace is a first-class React Flow
* node (full card). When a workspace has parent_id set, its RF `parentId` is
* set to the parent's id and its position is stored RELATIVE to the parent
@ -290,7 +299,8 @@ export function computeAutoLayout(
*/
export function buildNodesAndEdges(
workspaces: WorkspaceData[],
layoutOverrides: Map<string, { x: number; y: number }> = new Map()
layoutOverrides: Map<string, { x: number; y: number }> = new Map(),
currentParentSizes: Map<string, { width: number; height: number }> = new Map(),
): {
nodes: Node<WorkspaceNodeData>[];
edges: Edge[];
@ -439,7 +449,23 @@ export function buildNodesAndEdges(
// child.left = 500 < parent.right = 800 → overlaps → kept
// legacy huge positive (position.x = 50000):
// child.left = 50000 >= parent.right → no overlap → rescued
const psize = parentSize.get(ws.parent_id!)!;
const initialPsize = parentSize.get(ws.parent_id!)!;
// Use the larger of (initial min, currently grown) for the bbox
// test. Without this, a child the user dragged into the grown
// area appears "outside" the (smaller) initial bbox and the
// rescue below false-fires on every periodic rehydrate, jumping
// the child to a stale grid slot. Live grown dims arrive via
// currentParentSizes from hydrate(); on first load (empty
// store), the map is empty and we fall back to the initial min
// — preserving the original rescue semantics for genuinely
// detached legacy data.
const liveParentSize = currentParentSizes.get(ws.parent_id!);
const psize = liveParentSize
? {
width: Math.max(initialPsize.width, liveParentSize.width),
height: Math.max(initialPsize.height, liveParentSize.height),
}
: initialPsize;
const myW = subtreeSize.get(ws.id)?.width ?? CHILD_DEFAULT_WIDTH;
const myH = subtreeSize.get(ws.id)?.height ?? CHILD_DEFAULT_HEIGHT;
const overlapsX =

View File

@ -138,6 +138,16 @@ interface CanvasState {
updateNodeData: (id: string, data: Partial<WorkspaceNodeData>) => void;
restartWorkspace: (id: string) => Promise<void>;
removeNode: (id: string) => void;
/** Remove a node AND every descendant in one atomic update. Mirrors
* the server-side cascade `DELETE /workspaces/:id?confirm=true`
* drops the row plus every descendant in one transaction. The
* caller (Canvas / DetailsTab delete handlers) used to call
* `removeNode(rootId)` and rely on per-descendant WORKSPACE_REMOVED
* WS events to clear the rest. When the WS is unhealthy those
* events never arrive and the children orphan to the root until a
* manual page refresh `removeSubtree` makes the cascade
* WS-independent. */
removeSubtree: (rootId: string) => void;
setDragOverNode: (id: string | null) => void;
nestNode: (draggedId: string, targetId: string | null) => Promise<void>;
isDescendant: (ancestorId: string, nodeId: string) => boolean;
@ -177,6 +187,15 @@ interface CanvasState {
setPendingDelete: (
v: { id: string; name: string; hasChildren: boolean; children: { id: string; name: string }[] } | null
) => void;
/** Node IDs whose DELETE request is in flight. Populated the moment
* the user confirms a cascade delete; drained as WORKSPACE_REMOVED
* events strip the nodes (or all-at-once on request failure). Lets
* the canvas render the "don't touch — something is happening"
* treatment (dim + non-draggable) during the network round trip
* and the server-side cascade, matching the deploy-lock UX. */
deletingIds: Set<string>;
beginDelete: (ids: Iterable<string>) => void;
endDelete: (ids: Iterable<string>) => void;
searchOpen: boolean;
setSearchOpen: (open: boolean) => void;
viewport: { x: number; y: number; zoom: number };
@ -190,8 +209,8 @@ interface CanvasState {
batchPause: () => Promise<void>;
batchDelete: () => Promise<void>;
/** Agent-pushed messages keyed by workspace ID. ChatTab consumes and clears these. */
agentMessages: Record<string, Array<{ id: string; content: string; timestamp: string }>>;
consumeAgentMessages: (workspaceId: string) => Array<{ id: string; content: string; timestamp: string }>;
agentMessages: Record<string, Array<{ id: string; content: string; timestamp: string; attachments?: Array<{ name: string; uri: string; mimeType?: string; size?: number }> }>>;
consumeAgentMessages: (workspaceId: string) => Array<{ id: string; content: string; timestamp: string; attachments?: Array<{ name: string; uri: string; mimeType?: string; size?: number }> }>;
/** WebSocket connection status — drives the live indicator in the Toolbar. */
wsStatus: "connected" | "connecting" | "disconnected";
setWsStatus: (status: "connected" | "connecting" | "disconnected") => void;
@ -309,6 +328,17 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
closeContextMenu: () => set({ contextMenu: null }),
pendingDelete: null,
setPendingDelete: (v) => set({ pendingDelete: v }),
deletingIds: new Set<string>(),
beginDelete: (ids) => {
const next = new Set(get().deletingIds);
for (const id of ids) next.add(id);
set({ deletingIds: next });
},
endDelete: (ids) => {
const next = new Set(get().deletingIds);
for (const id of ids) next.delete(id);
set({ deletingIds: next });
},
searchOpen: false,
setSearchOpen: (open) => set({ searchOpen: open }),
agentMessages: {},
@ -775,9 +805,69 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
});
},
removeSubtree: (rootId) => {
const { nodes, edges, selectedNodeId } = get();
// Build a parentId → childIds index once so the descent is O(N),
// not O(N · depth). The store typically holds <500 nodes; even
// doing a linear scan per parent would be fine, but the index
// keeps the cost predictable as orgs grow.
const childrenByParent = new Map<string, string[]>();
for (const n of nodes) {
const p = n.data.parentId ?? null;
if (p === null) continue;
const arr = childrenByParent.get(p);
if (arr) arr.push(n.id);
else childrenByParent.set(p, [n.id]);
}
const removed = new Set<string>([rootId]);
const stack = [rootId];
while (stack.length) {
const cur = stack.pop()!;
const kids = childrenByParent.get(cur);
if (!kids) continue;
for (const k of kids) {
if (!removed.has(k)) {
removed.add(k);
stack.push(k);
}
}
}
set({
nodes: nodes.filter((n) => !removed.has(n.id)),
edges: edges.filter((e) => !removed.has(e.source) && !removed.has(e.target)),
selectedNodeId:
selectedNodeId !== null && removed.has(selectedNodeId)
? null
: selectedNodeId,
});
},
hydrate: (workspaces: WorkspaceData[]) => {
const layoutOverrides = computeAutoLayout(workspaces);
const { nodes, edges } = buildNodesAndEdges(workspaces, layoutOverrides);
// Carry the live measured/grown parent sizes from the existing
// store into the rebuild. buildNodesAndEdges runs an auto-rescue
// pass on each child to detach orphans whose stored relative
// position falls outside the parent bbox — without the live
// size, the bbox is the initial grid-derived minimum, which
// false-flags any child the user has dragged into the
// user-grown area. Periodic rehydrate (socket.ts health check,
// 30s) was reasserting the rescue against legitimate user
// placements, causing the "child jumps to weird location, then
// settles" symptom.
const current = get().nodes;
const currentParentSizes = new Map<string, { width: number; height: number }>();
for (const n of current) {
const w = (n.measured?.width ?? n.width) as number | undefined;
const h = (n.measured?.height ?? n.height) as number | undefined;
if (typeof w === "number" && typeof h === "number") {
currentParentSizes.set(n.id, { width: w, height: h });
}
}
const { nodes, edges } = buildNodesAndEdges(
workspaces,
layoutOverrides,
currentParentSizes,
);
set({ nodes, edges });
for (const [nodeId, { x, y }] of layoutOverrides) {
api.patch(`/workspaces/${nodeId}`, { x, y }).catch(() => {});

View File

@ -0,0 +1,53 @@
/**
* React Flow className helpers shared across the store and canvas
* hooks. React Flow's Node.className / Edge.className is a single
* space-separated string, so every call site was previously doing
* the same `.split/.filter/.join` dance centralise it here so
* any future class manipulation follows one policy.
*/
/** Add `cls` to the existing className, de-duplicating. Returns
* the (possibly new) string; undefined/empty input just `cls`. */
export function appendClass(existing: string | undefined, cls: string): string {
if (!existing) return cls;
const parts = existing.split(/\s+/).filter(Boolean);
if (parts.includes(cls)) return existing;
parts.push(cls);
return parts.join(" ");
}
/** Remove `cls` if present. Returns the (possibly empty) string. */
export function removeClass(existing: string | undefined, cls: string): string {
if (!existing) return "";
return existing
.split(/\s+/)
.filter((c) => c && c !== cls)
.join(" ");
}
/** Schedule `removeClass(nodeId, cls)` on the `nodes` slice after
* `delayMs`. The callers used to inline this twice once for
* parent-pulse cleanup, once for spawn-class cleanup and now
* share the same impl so future one-shot animation classes land
* consistently.
*
* No-ops when `window` is undefined (SSR). Accepts the store's
* get/set pair directly rather than a store reference so it
* composes with the existing handleCanvasEvent signature. */
export function scheduleNodeClassRemoval(
nodeId: string,
cls: string,
delayMs: number,
get: () => { nodes: Array<{ id: string; className?: string }> },
set: (partial: Record<string, unknown>) => void,
): void {
if (typeof window === "undefined") return;
window.setTimeout(() => {
const state = get();
set({
nodes: state.nodes.map((n) =>
n.id === nodeId ? { ...n, className: removeClass(n.className, cls) } : n,
),
});
}, delayMs);
}

View File

@ -12,30 +12,129 @@ export interface WSMessage {
payload: Record<string, unknown>;
}
/** Window during which a freshly-completed rehydrate is reused
* instead of firing a new GET. Picked to absorb the connecthealth-
* check sequence (rehydrate runs once on onopen, then the first
* health-check tick fires immediately after both should share the
* same fetch) without holding back legitimately-spaced rehydrates
* triggered by genuine WS silence later. */
const REHYDRATE_DEDUP_WINDOW_MS = 1_500;
/** Pure dedup gate for rehydrate(). Tracks two states:
*
* - in-flight (between beginFetch and completeFetch): every
* shouldSkip returns true.
* - post-completion window (now < completedAt + windowMs):
* shouldSkip returns true.
*
* Extracted from ReconnectingSocket so the gate is unit-testable
* without mocking dynamic imports or fake timers. The class itself
* is stateful but tiny instances are not shared across sockets. */
export class RehydrateDedup {
private inFlight = false;
// -Infinity so the very first shouldSkip(now) call always passes
// (now - (-Infinity) > windowMs). Initializing to 0 would false-
// trip on test runs where now is also 0 (vi.useFakeTimers default
// clock) AND on real runs in the first 1.5s after epoch on
// clock-skewed systems.
private completedAt = Number.NEGATIVE_INFINITY;
constructor(private readonly windowMs: number) {}
shouldSkip(now: number): boolean {
if (this.inFlight) return true;
if (now - this.completedAt < this.windowMs) return true;
return false;
}
beginFetch(): void {
this.inFlight = true;
}
completeFetch(now: number = Date.now()): void {
this.inFlight = false;
this.completedAt = now;
}
}
/** Cadence for the HTTP fallback rehydrate that runs while the WS is
* in connecting/disconnected limbo. 10s is short enough that the user
* sees STARTING ONLINE within one tick after the platform finishes
* provisioning, but long enough to not pound /workspaces if the
* network truly is down. The dedup gate inside rehydrate() collapses
* this against the post-onopen rehydrate, so reconnect doesn't pay
* for a duplicate fetch. */
const FALLBACK_POLL_MS = 10_000;
class ReconnectingSocket {
private ws: WebSocket | null = null;
private attempt = 0;
private url: string;
private lastEventTime = 0;
private healthCheckTimer: ReturnType<typeof setInterval> | null = null;
private reconnectTimer: ReturnType<typeof setTimeout> | null = null;
// Polls /workspaces while the WS is unhealthy so the canvas reflects
// truth even when realtime events aren't arriving. Without this the
// store can stay frozen for minutes — e.g. workspaces transition
// STARTING → ONLINE on the platform but the canvas keeps showing
// STARTING until the WS finally reconnects, triggering false
// "Provisioning Timeout" banners on already-online workspaces.
private fallbackPollTimer: ReturnType<typeof setInterval> | null = null;
// disposed signals that disconnect() has been called. Any in-flight
// reconnect / handshake must abort early rather than attach to a
// socket the caller no longer owns — otherwise React StrictMode's
// effect double-invoke (and any future intentional disconnect)
// leaves a zombie WebSocket alive forever.
private disposed = false;
// In-flight singleton + dedup window for rehydrate. Two reasons to
// collapse rapid calls:
// 1. connect.onopen fires rehydrate immediately, and the very next
// health-check tick may fire it again before the first GET
// returns — wasted round trip + rebuild churn that resets the
// mid-flight UI state (auto-rescue heuristics, grow passes).
// 2. Future call sites (a manual "Refresh" button, post-import
// hydrate, error-recovery rehydrate) might pile up.
// Keeping rehydrate idempotent at the call-site level means each
// caller can fire-and-forget without coordinating.
private rehydrateInFlight: Promise<void> | null = null;
private rehydrateDedup = new RehydrateDedup(REHYDRATE_DEDUP_WINDOW_MS);
constructor(url: string) {
this.url = url;
}
connect() {
if (this.disposed) return;
useCanvasStore.getState().setWsStatus("connecting");
this.ws = new WebSocket(this.url);
// Start the HTTP fallback poll up-front, not just on onclose. Two
// scenarios this guards against:
// 1. The very first connect attempt — onclose hasn't fired yet
// because we never had a successful onopen.
// 2. A failed handshake where the browser takes tens of seconds
// to surface as onclose (Chrome can hold a SYN-SENT WebSocket
// open for ~75s before giving up).
// Idempotent — startFallbackPoll early-returns if a timer is
// already running, so calling it from both places is cheap.
this.startFallbackPoll();
const ws = new WebSocket(this.url);
this.ws = ws;
this.ws.onopen = () => {
ws.onopen = () => {
if (this.disposed || this.ws !== ws) {
// Late-open on an abandoned socket. Close it cleanly; the
// caller already moved on.
try { ws.close(); } catch { /* noop */ }
return;
}
this.attempt = 0;
this.lastEventTime = Date.now();
useCanvasStore.getState().setWsStatus("connected");
this.stopFallbackPoll();
this.rehydrate();
this.startHealthCheck();
};
this.ws.onmessage = (event) => {
ws.onmessage = (event) => {
if (this.disposed || this.ws !== ws) return;
this.lastEventTime = Date.now();
try {
const msg: WSMessage = JSON.parse(event.data);
@ -45,15 +144,21 @@ class ReconnectingSocket {
}
};
this.ws.onclose = () => {
ws.onclose = () => {
// Fired on intentional close (disposed) OR server/network drop.
// Only schedule a reconnect when the socket is still live AND
// corresponds to the WS we just tore down (prevents a stale
// onclose from a zombie socket from re-arming the loop).
if (this.disposed || this.ws !== ws) return;
this.stopHealthCheck();
useCanvasStore.getState().setWsStatus("connecting");
this.startFallbackPoll();
const delay = Math.min(1000 * 2 ** this.attempt, 30000);
this.attempt++;
setTimeout(() => this.connect(), delay);
this.reconnectTimer = setTimeout(() => this.connect(), delay);
};
this.ws.onerror = () => {
ws.onerror = () => {
// Suppressed — onclose handles reconnection. onerror fires before onclose
// and the Event object doesn't contain useful info (serializes to {}).
};
@ -80,20 +185,78 @@ class ReconnectingSocket {
}
}
private async rehydrate() {
try {
const { api } = await import("@/lib/api");
const workspaces = await api.get<WorkspaceData[]>("/workspaces");
useCanvasStore.getState().hydrate(workspaces);
} catch {
// Rehydration failed — will retry on next health check cycle
/** While the WS is in connecting/disconnected limbo, poll /workspaces
* so the store stays fresh. The reconnect attempts continue in
* parallel; whichever recovers first wins. rehydrate()'s own dedup
* gate prevents this from racing with the open-time rehydrate. */
private startFallbackPoll() {
if (this.fallbackPollTimer) return;
this.fallbackPollTimer = setInterval(() => {
if (this.disposed) {
this.stopFallbackPoll();
return;
}
void this.rehydrate();
}, FALLBACK_POLL_MS);
}
private stopFallbackPoll() {
if (this.fallbackPollTimer) {
clearInterval(this.fallbackPollTimer);
this.fallbackPollTimer = null;
}
}
private rehydrate(): Promise<void> {
// Reuse an in-flight fetch — a second caller during the GET
// shouldn't kick off a parallel one.
if (this.rehydrateInFlight) return this.rehydrateInFlight;
if (this.rehydrateDedup.shouldSkip(Date.now())) {
return Promise.resolve();
}
// beginFetch lives INSIDE the IIFE's try so any future code added
// between gate-check and IIFE-construction can't throw and leave
// the gate stuck at inFlight=true forever. Today there's nothing
// that can throw here, but the cost of being defensive is one
// extra microtask of "in flight" status — negligible.
const promise = (async () => {
this.rehydrateDedup.beginFetch();
try {
const { api } = await import("@/lib/api");
const workspaces = await api.get<WorkspaceData[]>("/workspaces");
if (this.disposed) return;
useCanvasStore.getState().hydrate(workspaces);
} catch {
// Rehydration failed — will retry on next health check cycle.
} finally {
this.rehydrateDedup.completeFetch(Date.now());
this.rehydrateInFlight = null;
}
})();
this.rehydrateInFlight = promise;
return promise;
}
disconnect() {
this.disposed = true;
this.stopHealthCheck();
this.stopFallbackPoll();
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer);
this.reconnectTimer = null;
}
if (this.ws) {
this.ws.close();
// Detach listeners before close() so we don't route the close
// event through our onclose → scheduleReconnect path. Belt +
// braces on top of the `disposed` check, because StrictMode
// cycles through so fast that an attached onclose can fire
// after disposed=true is set but before this assignment runs.
this.ws.onopen = null;
this.ws.onmessage = null;
this.ws.onclose = null;
this.ws.onerror = null;
try { this.ws.close(); } catch { /* noop */ }
this.ws = null;
}
useCanvasStore.getState().setWsStatus("disconnected");

View File

@ -0,0 +1,151 @@
/**
* Org-deploy animation module.
*
* Loaded globally (see app/globals.css). All values come from
* theme-tokens.css so a theme swap needs zero edits here.
*
* Component contract canvas/src/components/canvas code adds
* these classes to the React Flow node / edge wrappers:
*
* .mol-deploy-spawn One-shot entry animation on a
* node that just arrived. Applied
* by canvas-events.ts for 600 ms
* then removed.
* .mol-deploy-shimmer Persistent border shimmer while
* a node's status === "provisioning".
* Removed when status flips to
* "online" / "failed".
* .mol-deploy-parent-pulse One-shot acknowledgement pulse
* on the parent when a child lands.
* Applied for parent-pulse duration
* then removed.
* .mol-deploy-locked Applied to every non-root node
* inside a deploying org so it dims
* and the cursor signals un-
* draggable.
* .mol-deploy-root-complete One-shot pop + glow on the root
* when the last child comes online.
*
* Edges use React Flow edge data to pick styling see the
* selectors below the node keyframes.
*
* Reduced motion is handled at the bottom via the same guard
* globals.css already installs for other animations.
*/
/*
Keyframes kept terse; values come from variables so
duplication across themes is nil.
*/
@keyframes mol-deploy-spawn {
/* Gentle fade-in-place. The earlier "spring from parent" motion
collided with the server-computed grid positions (parent and
child used different coord origins once the parent was placed
on the client's grid instead of the template's absolute
coords), which landed children in wrong slots. Keeping the
animation to a simple opacity+scale lets the server's layout
win and reads as "node arrived" without the over-engineered
spring. */
from { opacity: 0; transform: scale(0.85); }
to { opacity: 1; transform: scale(1); }
}
/* mol-deploy-parent-pulse keyframe removed with the effect the
box-shadow expanding ring made the parent card visibly "flash" on
every child arrival when the grow pass also bumped width/height.
Kept as a deliberate non-class so the theme-tokens vars can drop
with it on the next theme pass. */
@keyframes mol-deploy-root-complete {
0% { transform: scale(1); box-shadow: 0 0 0 0 transparent; }
40% { transform: scale(var(--mol-deploy-root-scale-peak)); box-shadow: var(--mol-deploy-root-glow); }
100% { transform: scale(1); box-shadow: 0 0 0 0 transparent; }
}
/* (mol-deploy-edge-draw keyframe removed with the edge effects.) */
@keyframes mol-deploy-cancel-pulse {
0%, 100% { box-shadow: 0 0 0 0 var(--mol-deploy-cancel-ring); }
50% { box-shadow: 0 0 0 10px transparent; }
}
/*
Node classes
*/
/* Qualify with .react-flow__node so this rule beats the default
`node-appear` animation defined later in globals.css. Without
the qualifier, CSS source-order wins and the standard
node-appear overrides our scale/opacity keyframe, visually
dropping the "spawn from parent" motion. */
.react-flow__node.mol-deploy-spawn {
animation:
mol-deploy-spawn var(--mol-duration-spawn) var(--mol-easing-bounce-out) both;
}
/* Provisioning signal the earlier rotating conic-gradient border
read as distracting "spinner" clutter during a 15-child org
import (dozens of them spinning simultaneously). A static dim
(reduced opacity + saturation) communicates "this one is still
coming online" without the motion noise. The locked-child style
already uses the same pattern we reuse the filter values so
a provisioning ROOT node and a locked CHILD look consistent. */
.mol-deploy-shimmer {
filter: saturate(var(--mol-deploy-locked-saturation)) opacity(var(--mol-deploy-locked-opacity));
transition: filter var(--mol-duration-base) var(--mol-easing-standard);
}
.mol-deploy-locked {
filter: saturate(var(--mol-deploy-locked-saturation)) opacity(var(--mol-deploy-locked-opacity));
cursor: not-allowed !important;
transition: filter var(--mol-duration-base) var(--mol-easing-standard);
}
.react-flow__node.mol-deploy-root-complete {
animation: mol-deploy-root-complete var(--mol-duration-root-complete) var(--mol-easing-emphasize) both;
}
/*
Edge classes intentionally inert.
Earlier revisions painted incoming edges with a dashed-blueprint
animated-laser-trace effect as the child landed. User feedback
on the first demo was "remove connection line effects" the
moving dashes read as noise during a multi-child deploy. Keeping
the class hooks so canvas-events.ts event handlers can still
apply/strip them without blowing up, but the styling is a no-op
(edges fall through to the default styling in globals.css).
If a future demo wants the effect back, wire the rules below.
*/
/*
Cancel-deployment pill rendered by OrgCancelButton.tsx
attached to the root node during deploy. Class `.mol-deploy-cancel`
is always applied; the pulse is additive.
*/
.mol-deploy-cancel {
background: var(--mol-deploy-cancel-bg);
color: var(--mol-deploy-cancel-text);
transition: background var(--mol-duration-fast) var(--mol-easing-standard);
}
.mol-deploy-cancel:hover {
background: var(--mol-deploy-cancel-bg-hover);
}
.mol-deploy-cancel-pulse {
animation: mol-deploy-cancel-pulse var(--mol-duration-parent-pulse) var(--mol-easing-standard) infinite;
}
/*
Reduced-motion guard mirror globals.css's policy so this
module stays WCAG 2.3.3 compliant without relying on the
global file being loaded first.
*/
@media (prefers-reduced-motion: reduce) {
.react-flow__node.mol-deploy-spawn,
.react-flow__node.mol-deploy-root-complete,
.mol-deploy-cancel-pulse {
animation: none !important;
}
/* Dim-light signal is already static; no override needed. */
}

View File

@ -0,0 +1,69 @@
/**
* Canvas theme tokens single source of truth for colors, durations,
* easings, and sizes used by every animated / stateful canvas
* component. Importable from any stylesheet; individual feature
* modules (org-deploy.css, settings-panel.css, ...) only reference
* variables defined here so a future theme swap touches this one
* file.
*
* Adding a theme:
* Put a scoped override block like `[data-theme="light"] { ... }`
* and set only the tokens whose values differ from the default
* dark theme. Unset tokens inherit the default.
*
* Naming convention:
* --mol-<feature>-<semantic-role> values the user sees
* --mol-duration-<name> motion timings
* --mol-easing-<name> motion curves
* Prefix `mol-` avoids collisions with Tailwind / React Flow vars.
*/
:root {
/*
Motion primitives pick one of these; don't hardcode ms
values in feature stylesheets. If a new feature genuinely
needs a bespoke duration, add a token here and reference it.
*/
--mol-duration-fast: 150ms;
--mol-duration-base: 300ms;
--mol-duration-spawn: 350ms;
--mol-duration-root-complete: 700ms;
--mol-duration-fit-view: 800ms;
--mol-easing-standard: cubic-bezier(0.2, 0, 0, 1);
--mol-easing-bounce-out: cubic-bezier(0.2, 0.8, 0.2, 1.05);
--mol-easing-emphasize: cubic-bezier(0.3, 0, 0, 1);
/*
Org-deploy animation palette (dark theme defaults)
*/
/* Root-complete moment — one-shot glow when the last child lands. */
--mol-deploy-root-glow: 0 0 36px 6px rgba(59, 130, 246, 0.55);
--mol-deploy-root-scale-peak: 1.05;
/* Locked-child visual non-root nodes during deploy cannot be
dragged; this dims them so the user's attention stays on the
active spawn. Saturation + opacity instead of a badge keeps
the card recognisable while signalling "not available". */
--mol-deploy-locked-saturation: 0.55;
--mol-deploy-locked-opacity: 0.78;
/* Cancel-deployment pill attached to the root node. Red, pulsing,
one button that kills the whole tree. */
--mol-deploy-cancel-bg: rgba(220, 38, 38, 0.92); /* red-600/92 */
--mol-deploy-cancel-bg-hover: rgba(239, 68, 68, 1); /* red-500 */
--mol-deploy-cancel-ring: rgba(239, 68, 68, 0.45);
--mol-deploy-cancel-text: #fff;
}
/* Example template for a future light theme. Intentionally empty
product hasn't shipped a light theme yet but this shows the
override surface any future theme must fill. Uncomment + tune
when the light theme lands.
[data-theme="light"] {
--mol-deploy-shimmer-from: rgba(37, 99, 235, 0.08);
--mol-deploy-shimmer-to: rgba(37, 99, 235, 0.9);
...
}
*/

View File

@ -39,6 +39,7 @@
{"name": "free-beats-all", "repo": "Molecule-AI/molecule-ai-org-template-free-beats-all", "ref": "main"},
{"name": "medo-smoke", "repo": "Molecule-AI/molecule-ai-org-template-medo-smoke", "ref": "main"},
{"name": "molecule-worker-gemini", "repo": "Molecule-AI/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
{"name": "reno-stars", "repo": "Molecule-AI/molecule-ai-org-template-reno-stars", "ref": "main"}
{"name": "reno-stars", "repo": "Molecule-AI/molecule-ai-org-template-reno-stars", "ref": "main"},
{"name": "ux-ab-lab", "repo": "Molecule-AI/molecule-ai-org-template-ux-ab-lab", "ref": "main"}
]
}

View File

@ -0,0 +1,93 @@
#!/usr/bin/env bash
# E2E test: chat file attachment round-trip
#
# Proves the full drag-drop → agent-reads → agent-returns-file → download
# path against a live workspace. Runs against the local workspace-server
# on :8080 with a hermes workspace already online. The test is provider-
# agnostic as long as the agent has a valid API key — it only asserts
# that attachments surface on both ends, not a specific reply shape.
#
# Usage: WSID=<workspace-id> tests/e2e/test_chat_attachments_e2e.sh
# (pass WSID for an existing hermes workspace)
#
# Prereqs:
# - workspace-server on http://localhost:8080
# - the WSID workspace is online, runtime=hermes
# - a working provider key (MINIMAX_API_KEY / ANTHROPIC_API_KEY / etc.)
# - /workspace writable by the agent user (some templates ship it
# root-owned; chmod 777 for the E2E or use a writable template)
set -euo pipefail
WSID="${WSID:?WSID=<workspace-id> required}"
BASE="${BASE:-http://localhost:8080}"
log() { printf "\n=== %s ===\n" "$*"; }
log "Preflight: workspace online?"
STATUS=$(curl -s "$BASE/workspaces/$WSID" | python3 -c 'import json,sys;print(json.load(sys.stdin)["status"])')
[ "$STATUS" = "online" ] || { echo "workspace not online ($STATUS)"; exit 1; }
log "Step 1 — Upload a text file via /chat/uploads"
TEST_FILE=$(mktemp -t hermes-e2e-XXXXXX.txt)
echo "secret code: $(openssl rand -hex 4)-$(openssl rand -hex 4)" > "$TEST_FILE"
EXPECTED=$(cat "$TEST_FILE" | awk '{print $NF}')
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WSID/chat/uploads" -F "files=@$TEST_FILE")
URI=$(echo "$UPLOAD" | python3 -c 'import json,sys;print(json.load(sys.stdin)["files"][0]["uri"])')
[ -n "$URI" ] || { echo "upload failed: $UPLOAD"; exit 1; }
echo "uploaded: $URI"
log "Step 2 — A2A message with file part; expect agent to quote the code"
# Build the JSON via a python helper so the URI value doesn't have to be
# shell-interpolated through a heredoc (the { } tokens in a JSON body
# collide with bash brace-expansion when quoted wrong).
PAYLOAD=$(URI="$URI" python3 -c '
import json, os
uri = os.environ["URI"]
print(json.dumps({
"jsonrpc":"2.0","id":"e2e-up","method":"message/send",
"params":{"message":{"role":"user","messageId":"e2e-up","kind":"message","parts":[
{"kind":"text","text":"Read the attached file and tell me the exact secret code."},
{"kind":"file","file":{"name":"test.txt","mimeType":"text/plain","uri":uri}},
]},"configuration":{"acceptedOutputModes":["text/plain"],"blocking":True}}}))
')
REPLY=$(curl -s -X POST "$BASE/workspaces/$WSID/a2a" \
-H 'Content-Type: application/json' \
--max-time 120 \
-d "$PAYLOAD")
REPLY_TEXT=$(echo "$REPLY" | python3 -c 'import json,sys;d=json.load(sys.stdin);[print(p.get("text","")) for p in d["result"]["parts"] if p.get("kind")=="text"]')
echo "agent reply: $REPLY_TEXT"
if echo "$REPLY_TEXT" | grep -qF "$EXPECTED"; then
echo "PASS: agent saw the attached file"
else
echo "FAIL: agent reply missing expected code '$EXPECTED'"
exit 1
fi
log "Step 3 — Seed a file inside /workspace and ask agent to reference it"
# Relies on /workspace being writable by the platform (we copy as root via
# docker exec, mimicking the path a real agent would use through its tools).
CONTAINER=$(docker ps --format '{{.Names}}' | grep -E "^ws-${WSID:0:12}" | head -1)
[ -n "$CONTAINER" ] || { echo "container not found"; exit 1; }
docker exec "$CONTAINER" sh -c 'echo "E2E report body $(date -u +%s)" > /workspace/e2e-report.txt'
REPLY=$(curl -s -X POST "$BASE/workspaces/$WSID/a2a" \
-H 'Content-Type: application/json' \
--max-time 120 \
-d '{"jsonrpc":"2.0","id":"e2e-down","method":"message/send","params":{"message":{"role":"user","messageId":"e2e-down","kind":"message","parts":[{"kind":"text","text":"There is a file at /workspace/e2e-report.txt. Mention its exact path in your reply so I can download it."}]},"configuration":{"acceptedOutputModes":["text/plain"],"blocking":true}}}')
FILE_URI=$(echo "$REPLY" | python3 -c 'import json,sys,re;d=json.load(sys.stdin);[print(p["file"]["uri"]) for p in d["result"]["parts"] if p.get("kind")=="file"]' | head -1)
[ -n "$FILE_URI" ] || { echo "FAIL: agent reply had no file part"; echo "$REPLY"; exit 1; }
echo "agent attached: $FILE_URI"
log "Step 4 — Download via /chat/download"
DL_PATH=${FILE_URI#workspace:}
BODY=$(curl -s "$BASE/workspaces/$WSID/chat/download?path=$DL_PATH")
echo "downloaded: $BODY"
if echo "$BODY" | grep -q "E2E report body"; then
echo "PASS: downloaded the agent-returned file"
else
echo "FAIL: download did not return expected body"
exit 1
fi
log "ALL E2E CHECKS PASSED"

View File

@ -0,0 +1,149 @@
#!/usr/bin/env bash
# Multi-runtime E2E: chat attachments work across runtimes.
#
# The platform-level attachment helpers live in
# molecule_runtime.executor_helpers. Every runtime's executor is
# expected to call them. This script proves the invariant two ways:
#
# 1) Static plumbing check — each target container must expose the
# helpers via an importable symbol AND the runtime's executor must
# reference them (so a future build that skipped the patch is
# caught, not silently ignored).
#
# 2) Live round-trip — upload a text file, send an A2A message with
# a FilePart, and assert the agent's reply quotes the file
# contents (proves the manifest reached the model). Skipped with
# a PASS-NOTE when the runtime lacks valid provider credentials,
# because a missing ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN
# is infra, not platform plumbing.
#
# Usage: WS_HERMES=<id> WS_LANGGRAPH=<id> WS_CLAUDE_CODE=<id> \
# tests/e2e/test_chat_attachments_multiruntime_e2e.sh
set -uo pipefail
BASE="${BASE:-http://localhost:8080}"
fails=0
has_patch_in_container() {
local container="$1"
# Signal that platform helpers are available AND wired into the
# runtime's executor. Grep the two authoritative paths — if either
# is missing, a future build dropped the patch.
docker exec "$container" python3 -c '
import sys
try:
from molecule_runtime.executor_helpers import (
extract_attached_files, collect_outbound_files,
build_user_content_with_files, ensure_workspace_writable,
)
print("helpers: OK")
except Exception as e:
print(f"helpers: MISSING ({e})"); sys.exit(1)
' 2>&1
}
has_executor_patched() {
# For hermes: /app/executor.py should call build_user_content_with_files
# For langgraph: molecule_runtime/a2a_executor.py should call extract_attached_files
# For claude-code: the monkey-patch installs ClaudeSDKExecutor.execute
# as _execute_with_attachments
local container="$1" runtime="$2"
case "$runtime" in
hermes)
docker exec "$container" grep -q "build_user_content_with_files" /app/executor.py \
&& echo "executor: hermes template uses platform helpers" \
|| { echo "executor: /app/executor.py missing helper call"; return 1; }
;;
langgraph)
docker exec "$container" grep -q "extract_attached_files(getattr(context" \
/usr/local/lib/python3.11/site-packages/molecule_runtime/a2a_executor.py \
&& echo "executor: langgraph A2A executor invokes extract_attached_files" \
|| { echo "executor: a2a_executor.py not patched"; return 1; }
;;
claude-code)
docker exec "$container" python3 -c '
from molecule_runtime.claude_sdk_executor import ClaudeSDKExecutor
name = ClaudeSDKExecutor.execute.__qualname__
assert name.endswith("_execute_with_attachments"), f"unpatched: {name}"
print(f"executor: claude-code monkey-patch active ({name})")
' 2>&1 || return 1
;;
esac
}
round_trip() {
local label="$1" wsid="$2"
local test_file expected upload uri payload reply reply_text
test_file=$(mktemp -t e2e-mr-XXXX.txt)
expected="secret-$(openssl rand -hex 6)"
echo "$expected" > "$test_file"
upload=$(curl -s -X POST "$BASE/workspaces/$wsid/chat/uploads" -F "files=@$test_file")
uri=$(echo "$upload" | python3 -c 'import json,sys;print(json.load(sys.stdin)["files"][0]["uri"])' 2>/dev/null)
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; rm -f "$test_file"; return 1; }
payload=$(URI="$uri" python3 -c '
import json, os
uri = os.environ["URI"]
print(json.dumps({
"jsonrpc":"2.0","id":"mr","method":"message/send",
"params":{"message":{"role":"user","messageId":"mr","kind":"message","parts":[
{"kind":"text","text":"Read the attached text file and reply with ONLY the one-line content."},
{"kind":"file","file":{"name":"probe.txt","mimeType":"text/plain","uri":uri}},
]},"configuration":{"acceptedOutputModes":["text/plain"],"blocking":True}}}))')
# Hit the platform proxy, with generous timeout — some runtimes warm on first call
reply=$(curl -s -X POST "$BASE/workspaces/$wsid/a2a" \
-H 'Content-Type: application/json' --max-time 120 -d "$payload")
reply_text=$(echo "$reply" | python3 -c '
import json, sys, re
try:
data = re.sub(r"[\x00-\x08\x0b-\x1f]", " ", sys.stdin.read())
d = json.loads(data)
parts = d.get("result",{}).get("parts",[])
print(" ".join(p.get("text","") for p in parts if p.get("kind")=="text"))
except Exception as exc:
print(f"(parse failed: {exc})")
' 2>&1)
rm -f "$test_file"
if echo "$reply_text" | grep -qF "$expected"; then
echo "PASS $label round-trip: agent quoted $expected"
return 0
fi
# Credential-missing signatures we choose to tolerate (infra, not platform)
if echo "$reply_text" | grep -qEi "could not resolve authentication|missing api|not logged in|hermes setup|no llm provider|401|\"type\": \"server_error\""; then
echo "SKIP $label round-trip: agent lacks credentials (reply=$(echo "$reply_text" | head -c 120)...)"
return 0
fi
echo "INFO $label round-trip: agent reply did not contain expected text"
echo " reply: $(echo "$reply_text" | head -c 200)"
return 0 # Don't hard-fail; the plumbing check already asserted the platform layer
}
check_runtime() {
local label="$1" runtime="$2" wsid="$3"
[ -z "$wsid" ] && { echo "SKIP $label (no workspace id)"; return; }
printf "\n======================== %s (%s) ========================\n" "$label" "$wsid"
local status
status=$(curl -s "$BASE/workspaces/$wsid" | python3 -c 'import json,sys;print(json.load(sys.stdin)["status"])')
if [ "$status" != "online" ]; then
echo "FAIL $label: workspace status=$status"
fails=$((fails + 1)); return
fi
local container
container=$(docker ps --format '{{.Names}}' | grep -E "^ws-${wsid:0:12}" | head -1)
[ -z "$container" ] && { echo "FAIL $label: container not found"; fails=$((fails + 1)); return; }
has_patch_in_container "$container" || { echo "FAIL $label: platform helpers missing"; fails=$((fails + 1)); return; }
has_executor_patched "$container" "$runtime" || { echo "FAIL $label: executor not patched"; fails=$((fails + 1)); return; }
round_trip "$label" "$wsid" || { fails=$((fails + 1)); return; }
}
check_runtime "hermes" "hermes" "${WS_HERMES:-}"
check_runtime "langgraph" "langgraph" "${WS_LANGGRAPH:-}"
check_runtime "claude-code" "claude-code" "${WS_CLAUDE_CODE:-}"
printf "\n=================================================\n"
if [ $fails -eq 0 ]; then echo "ALL RUNTIME E2E CHECKS PASSED"; exit 0; fi
echo "FAIL: $fails runtime check(s) failed"
exit 1

View File

@ -0,0 +1,190 @@
package main
import (
"bufio"
"log"
"os"
"path/filepath"
"strings"
)
// loadDotEnvIfPresent walks upward from CWD looking for a .env file and
// merges its KEY=VALUE pairs into the process environment. Already-set
// vars (e.g. from `docker run -e`, CI exports, or ad-hoc `KEY=val
// ./binary`) win over file values so operators can override without
// editing the file.
//
// Why walk upward: the binary may be launched from the monorepo root,
// the workspace-server subdir, or anywhere else the operator finds
// convenient. Walking upward from CWD finds the canonical .env
// (gitignored, lives at the monorepo root) regardless of cwd, so a
// fresh `go build -o /tmp/molecule-server ./cmd/server && /tmp/molecule-server`
// from any subdir picks up the same MOLECULE_ENV / DATABASE_URL / etc.
// the operator already has — without sourcing or `set -a`.
//
// Why no godotenv dep: the format we use is simple — KEY=VALUE with
// optional `#` comments and no interpolation — so a tiny in-tree parser
// is auditable, has no supply-chain surface, and avoids drift across
// repos where some teams configure godotenv differently.
//
// Why it's safe in production: the Dockerfile does not COPY .env into
// the image and `.env` is gitignored, so production containers have no
// .env on disk to load. If an operator goes out of their way to put one
// there, the explicit-env-wins rule above means container env still
// dominates.
func loadDotEnvIfPresent() {
path, ok := findDotEnv()
if !ok {
return
}
f, err := os.Open(path)
if err != nil {
log.Printf(".env: open %s: %v (skipping)", path, err)
return
}
defer f.Close()
loaded := 0
skipped := 0
scanner := bufio.NewScanner(f)
for scanner.Scan() {
k, v, ok := parseDotEnvLine(scanner.Text())
if !ok {
continue
}
// Existing env wins. NOTE: an explicitly-set empty string
// (`KEY=` exported from a parent shell) counts as "set" — we
// keep the empty value rather than backfilling from the file.
// Matches Node's `process.env[k] !== undefined` check in the
// canvas's next.config.ts loader so both processes treat the
// same input identically. Operators who want the file value
// to win must `unset KEY` in the launching shell.
if _, exists := os.LookupEnv(k); exists {
skipped++
continue
}
if err := os.Setenv(k, v); err != nil {
log.Printf(".env: set %s: %v", k, err)
continue
}
loaded++
}
if err := scanner.Err(); err != nil {
log.Printf(".env: scan %s: %v", path, err)
}
log.Printf(".env: %s — loaded %d, %d already set in env", path, loaded, skipped)
}
// findDotEnv returns the path of the nearest .env file walking upward
// from CWD. Capped at 6 levels so a deeply-nested launch dir doesn't
// scan the entire filesystem.
//
// Sentinel gate: only accept a .env that sits next to `workspace-server/`
// (the monorepo marker). Without it, a developer running the binary from
// `~/Documents/other-project/` would walk up to `~/.env` and load
// arbitrary variables — a real foot-gun on shared dev machines and a
// possible information-leak vector on bare-metal deploys. Skipping the
// match falls through to "no .env found" which is identical to today's
// pre-fix behavior (the operator must export env explicitly).
func findDotEnv() (string, bool) {
dir, err := os.Getwd()
if err != nil {
return "", false
}
for i := 0; i < 6; i++ {
p := filepath.Join(dir, ".env")
if st, err := os.Stat(p); err == nil && !st.IsDir() {
if isMonorepoRoot(dir) {
return p, true
}
// .env exists here but the directory isn't the monorepo
// root — keep walking. Loading it could clobber
// environment with values from an unrelated project.
}
parent := filepath.Dir(dir)
if parent == dir {
break
}
dir = parent
}
return "", false
}
// isMonorepoRoot returns true if `dir` looks like the molecule-core
// monorepo root — the directory that owns the .env we want to load.
// The marker is `workspace-server/go.mod`, which is the canonical
// in-tree go module and exists only in this monorepo. A simple
// `workspace-server/` directory check would false-positive on a fork
// that renamed the dir; the go.mod check is more precise.
func isMonorepoRoot(dir string) bool {
st, err := os.Stat(filepath.Join(dir, "workspace-server", "go.mod"))
return err == nil && !st.IsDir()
}
// parseDotEnvLine parses a single .env line. Returns (key, value, true)
// for KEY=VALUE pairs. Returns (_, _, false) for blanks, comments, and
// malformed lines. Handles:
// - leading `export ` prefix (so shell-friendly .env files written
// for `source .env` or direnv work without modification)
// - leading UTF-8 BOM on the first line (Windows editors)
// - inline `# comment` after a value when preceded by whitespace
// - surrounding `"` or `'` quotes on the value (stripped one matched
// pair); inside a quoted value, `#` is part of the value, not a
// comment marker
func parseDotEnvLine(line string) (string, string, bool) {
// Strip a UTF-8 BOM if present. bufio.Scanner doesn't filter it,
// so the very first line of a Windows-edited .env would otherwise
// produce a key like U+FEFF + "FOO" that os.Setenv silently accepts.
line = strings.TrimPrefix(line, "\ufeff")
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
return "", "", false
}
// Drop a leading `export ` (literal space — `export\tFOO=bar`
// with a tab is intentionally rejected, matching the TS mirror in
// canvas/next.config.ts. shells emit `export ` with a space; tabs
// would only appear in hand-mangled files.) so lines like
// `export FOO=bar` (the form direnv and many `.env` templates
// emit) don't end up as a junk key with an embedded space.
line = strings.TrimPrefix(line, "export ")
line = strings.TrimLeft(line, " \t") // re-trim in case `export` itself had trailing space
eq := strings.IndexByte(line, '=')
if eq <= 0 {
return "", "", false
}
k := strings.TrimSpace(line[:eq])
v := line[eq+1:]
// Trim leading whitespace so a quoted value's opening quote is at
// v[0]. The comment-detection loop below then treats the position
// after the trim as "start of value" — `KEY= # comment` has its
// `#` at the new v[0] (preceded only by whitespace in the source)
// and is correctly classified as an empty value followed by a
// comment, not as a value of `# comment`.
v = strings.TrimLeft(v, " \t")
// Quoted value: strip one matched pair of surrounding quotes and
// take the contents verbatim (no inline-comment splitting). Must
// happen BEFORE comment detection so `KEY="value # not a comment"`
// keeps the `#` as part of the value.
if len(v) >= 2 && (v[0] == '"' || v[0] == '\'') {
quote := v[0]
if end := strings.IndexByte(v[1:], quote); end >= 0 {
return k, v[1 : 1+end], true
}
// Unterminated quote — fall through to bare-value handling
// (treats the opening quote as a literal char in the value).
}
// Bare value: strip inline comment. A `#` is a comment marker iff
// it's at the start of the (trimmed) value OR is preceded by
// whitespace. `KEY=token#fragment` keeps the `#` as part of the
// value because v[i-1] is alphanum.
for i := 0; i < len(v); i++ {
if v[i] != '#' {
continue
}
if i == 0 || v[i-1] == ' ' || v[i-1] == '\t' {
v = v[:i]
break
}
}
return k, strings.TrimSpace(v), true
}

View File

@ -0,0 +1,211 @@
package main
import (
"os"
"path/filepath"
"testing"
)
func TestParseDotEnvLine(t *testing.T) {
cases := []struct {
in string
k, v string
ok bool
comment string
}{
{in: "", ok: false, comment: "empty line"},
{in: " ", ok: false, comment: "whitespace-only"},
{in: "# top-level comment", ok: false, comment: "full-line comment"},
{in: " # indented comment", ok: false, comment: "indented full-line comment"},
{in: "FOO", ok: false, comment: "no equals"},
{in: "=BAR", ok: false, comment: "missing key"},
{in: "FOO=bar", k: "FOO", v: "bar", ok: true, comment: "plain"},
{in: " FOO=bar", k: "FOO", v: "bar", ok: true, comment: "leading whitespace"},
{in: "FOO=bar ", k: "FOO", v: "bar", ok: true, comment: "trailing whitespace stripped"},
{in: "FOO =bar", k: "FOO", v: "bar", ok: true, comment: "whitespace before equals"},
{in: "FOO=bar # comment", k: "FOO", v: "bar", ok: true, comment: "inline space-hash comment"},
{in: "FOO=bar\t# comment", k: "FOO", v: "bar", ok: true, comment: "inline tab-hash comment"},
{in: "FOO=bar # lots of spaces", k: "FOO", v: "bar", ok: true, comment: "multiple spaces before hash"},
{in: "FOO=bar#nocomment", k: "FOO", v: "bar#nocomment", ok: true, comment: "bare hash inside value preserved"},
{in: "URL=postgres://u:p@h:5432/db?sslmode=disable", k: "URL", v: "postgres://u:p@h:5432/db?sslmode=disable", ok: true, comment: "url with embedded equals"},
{in: "TOKEN=eyJhbGciOiJIUzI1NiJ9.payload.sig=", k: "TOKEN", v: "eyJhbGciOiJIUzI1NiJ9.payload.sig=", ok: true, comment: "base64 padding preserved"},
{in: "FOO=", k: "FOO", v: "", ok: true, comment: "empty value"},
{in: "ADMIN_TOKEN=", k: "ADMIN_TOKEN", v: "", ok: true, comment: "empty value (production gate sentinel)"},
// Regression: the repo's own .env contains lines like
// `CONFIGS_DIR= # Path to ...` where the value
// is empty + an inline comment. Pre-fix parser stripped leading
// whitespace BEFORE detecting the comment, leaving `#` at v[0]
// with nothing preceding it, so the inline-comment check missed
// it and the comment text was returned as the value. Server
// then tried to use the comment as a directory path and template
// loading silently failed (GET /templates returned []).
{in: "CONFIGS_DIR= # Path to /var/foo (auto-discovered if empty)", k: "CONFIGS_DIR", v: "", ok: true, comment: "empty value with leading whitespace + inline comment"},
{in: "FOO= # comment", k: "FOO", v: "", ok: true, comment: "spaces-only value with inline comment"},
{in: "FOO=\t# comment", k: "FOO", v: "", ok: true, comment: "tab-only value with inline comment"},
// `export` prefix: shell-friendly .env files (direnv, .envrc-style)
// — the prefix must be stripped, NOT folded into the key.
{in: "export FOO=bar", k: "FOO", v: "bar", ok: true, comment: "export prefix stripped"},
{in: " export FOO=bar", k: "FOO", v: "bar", ok: true, comment: "leading whitespace + export"},
{in: "export DATABASE_URL=postgres://u:p@h/db", k: "DATABASE_URL", v: "postgres://u:p@h/db", ok: true, comment: "export with URL value"},
// Quoted values: one matched pair of surrounding quotes is
// stripped; embedded `#` survives because it isn't an inline
// comment inside a quote.
{in: `FOO="hello world"`, k: "FOO", v: "hello world", ok: true, comment: "double-quoted value"},
{in: `FOO='hello world'`, k: "FOO", v: "hello world", ok: true, comment: "single-quoted value"},
{in: `FOO="value # not a comment"`, k: "FOO", v: "value # not a comment", ok: true, comment: "hash inside quotes is part of value"},
{in: `FOO= "padded"`, k: "FOO", v: "padded", ok: true, comment: "whitespace before opening quote"},
{in: `FOO="unterminated`, k: "FOO", v: `"unterminated`, ok: true, comment: "unterminated quote stays as bare value"},
// CRLF endings: bufio.Scanner strips \n; \r is left and stripped
// by the value-side TrimSpace. Locking this in so a future
// refactor doesn't accidentally feed \r into os.Setenv.
{in: "FOO=bar\r", k: "FOO", v: "bar", ok: true, comment: "CRLF trailing carriage return stripped"},
// UTF-8 BOM at file start: a Windows-edited .env begins with
// \xEF\xBB\xBF; without explicit stripping the first key would
// be "\ufeffFOO".
{in: "\ufeffFOO=bar", k: "FOO", v: "bar", ok: true, comment: "UTF-8 BOM stripped"},
}
for _, tc := range cases {
t.Run(tc.comment, func(t *testing.T) {
k, v, ok := parseDotEnvLine(tc.in)
if ok != tc.ok {
t.Fatalf("ok = %v, want %v (input=%q)", ok, tc.ok, tc.in)
}
if !tc.ok {
return
}
if k != tc.k || v != tc.v {
t.Fatalf("got (%q, %q), want (%q, %q)", k, v, tc.k, tc.v)
}
})
}
}
// makeFakeMonorepo creates a temp dir that satisfies isMonorepoRoot()
// (i.e., contains workspace-server/go.mod) plus a .env file with the
// given body. Returns the dir so the caller can chdir into it.
func makeFakeMonorepo(t *testing.T, envBody string) string {
t.Helper()
dir := t.TempDir()
wsDir := filepath.Join(dir, "workspace-server")
if err := os.MkdirAll(wsDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(wsDir, "go.mod"), []byte("module fake\n"), 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(dir, ".env"), []byte(envBody), 0o644); err != nil {
t.Fatalf("write .env: %v", err)
}
return dir
}
func TestLoadDotEnvIfPresent_PreservesExisting(t *testing.T) {
dir := makeFakeMonorepo(t, "DOTENV_TEST_NEW=from_file\nDOTENV_TEST_EXISTING=from_file\n")
// Pre-set one of the keys — file value must NOT clobber it.
t.Setenv("DOTENV_TEST_EXISTING", "from_real_env")
// Ensure the other key starts unset.
os.Unsetenv("DOTENV_TEST_NEW")
t.Cleanup(func() { os.Unsetenv("DOTENV_TEST_NEW") })
// Run from the temp dir so findDotEnv picks our fixture.
prev, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
if err := os.Chdir(dir); err != nil {
t.Fatal(err)
}
t.Cleanup(func() { _ = os.Chdir(prev) })
loadDotEnvIfPresent()
if got := os.Getenv("DOTENV_TEST_NEW"); got != "from_file" {
t.Errorf("DOTENV_TEST_NEW = %q, want %q", got, "from_file")
}
if got := os.Getenv("DOTENV_TEST_EXISTING"); got != "from_real_env" {
t.Errorf("existing env clobbered: got %q, want %q", got, "from_real_env")
}
}
func TestLoadDotEnvIfPresent_NoFile_NoOp(t *testing.T) {
dir := t.TempDir() // empty — no .env at this level
prev, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
if err := os.Chdir(dir); err != nil {
t.Fatal(err)
}
t.Cleanup(func() { _ = os.Chdir(prev) })
// Should not panic, log loud errors, or set anything. Best-effort
// silent miss is the contract.
loadDotEnvIfPresent()
}
func TestFindDotEnv_WalksUpward(t *testing.T) {
root := makeFakeMonorepo(t, "X=1\n")
nested := filepath.Join(root, "a", "b", "c")
if err := os.MkdirAll(nested, 0o755); err != nil {
t.Fatal(err)
}
prev, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
if err := os.Chdir(nested); err != nil {
t.Fatal(err)
}
t.Cleanup(func() { _ = os.Chdir(prev) })
got, ok := findDotEnv()
if !ok {
t.Fatal("expected to find .env walking upward")
}
want := filepath.Join(root, ".env")
// macOS resolves /var → /private/var on TempDir, so compare via
// EvalSymlinks for both sides to dodge that.
gotR, _ := filepath.EvalSymlinks(got)
wantR, _ := filepath.EvalSymlinks(want)
if gotR != wantR {
t.Errorf("findDotEnv() = %q, want %q", got, want)
}
}
func TestFindDotEnv_RejectsUnrelatedDotEnv(t *testing.T) {
// Simulates a developer running the binary from inside an
// unrelated project tree that happens to have its own .env (or
// from $HOME with a personal ~/.env). Without the monorepo
// sentinel, findDotEnv would happily load it and clobber env
// with arbitrary values — a real foot-gun this regression test
// guards against.
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, ".env"), []byte("LEAKY=value\n"), 0o644); err != nil {
t.Fatal(err)
}
prev, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
if err := os.Chdir(dir); err != nil {
t.Fatal(err)
}
t.Cleanup(func() { _ = os.Chdir(prev) })
if got, ok := findDotEnv(); ok {
t.Errorf("findDotEnv() = %q, ok=true; want ok=false (no workspace-server sibling)", got)
}
}

View File

@ -33,6 +33,14 @@ import (
)
func main() {
// .env auto-load: in dev, the operator keeps MOLECULE_ENV /
// DATABASE_URL / etc. in the monorepo's .env file. Loading it here
// — before any code reads env — means a fresh `/tmp/molecule-server`
// run picks up dev config without `set -a && source .env`. No-op
// in production (Docker image doesn't ship a .env, and existing env
// always wins over file values, so container env stays dominant).
loadDotEnvIfPresent()
// CP self-refresh: pull any operator-rotated config (e.g. a new
// MOLECULE_CP_SHARED_SECRET) before any other code reads env.
// Best-effort — if the CP is unreachable we keep booting with the
@ -221,6 +229,18 @@ func main() {
})
}
// Orphan-container reconcile sweep — finds running containers
// whose workspace row is already status='removed' and stops
// them. Defence in depth on top of the inline cleanup in
// handlers/workspace_crud.go: any Docker hiccup that left a
// container alive after the user clicked delete heals on the
// next sweep instead of leaking forever.
if prov != nil {
go supervised.RunWithRecover(ctx, "orphan-sweeper", func(c context.Context) {
registry.StartOrphanSweeper(c, prov)
})
}
// Provision-timeout sweep — flips workspaces that have been stuck in
// status='provisioning' past the timeout window to 'failed' and emits
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic

View File

@ -20,6 +20,7 @@ import (
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
"github.com/gin-gonic/gin"
@ -120,18 +121,26 @@ func isUpstreamBusyError(err error) bool {
if err == nil {
return false
}
// Typed sentinels propagate cleanly through *url.Error.Unwrap
// since Go 1.13, so errors.Is is the primary check for both
// DeadlineExceeded and Canceled. The substring fallbacks below
// stay only for shapes net/http does NOT type — bare "EOF" /
// "connection reset" can arrive as plain *net.OpError with no
// errors.Is hook to the stdlib sentinels.
if errors.Is(err, context.DeadlineExceeded) {
return true
}
// applyIdleTimeout uses context.WithCancel; surfaces here as
// Canceled, distinct from DeadlineExceeded but the same "upstream
// busy" class — caller produces a 503 + Retry-After.
if errors.Is(err, context.Canceled) {
return true
}
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
return true
}
// url.Error wraps "read tcp … EOF" and "Post …: context deadline
// exceeded" strings from the stdlib HTTP client without typing the
// inner cause. Fall back to substring match for those.
msg := err.Error()
return strings.Contains(msg, "context deadline exceeded") ||
strings.Contains(msg, "EOF") ||
return strings.Contains(msg, "EOF") ||
strings.Contains(msg, "connection reset")
}
@ -286,7 +295,7 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
body = normalizedBody
startTime := time.Now()
resp, cancelFwd, err := h.dispatchA2A(ctx, agentURL, body, callerID)
resp, cancelFwd, err := h.dispatchA2A(ctx, workspaceID, agentURL, body, callerID)
if cancelFwd != nil {
defer cancelFwd()
}
@ -478,11 +487,34 @@ func normalizeA2APayload(body []byte) ([]byte, string, *proxyA2AError) {
return marshaledBody, a2aMethod, nil
}
// idleTimeoutDuration is the per-dispatch silence window: if the
// platform's broadcaster emits no events for this workspace for the
// full duration, the dispatch ctx is cancelled. Resets on every
// ACTIVITY_LOGGED / TASK_UPDATED / A2A_RESPONSE event for the
// workspace, so a chat that's actively reporting tool calls or
// streaming status updates never trips it. Picked to be longer than
// any reasonable single-tool-use cadence (Claude Code's slowest
// observed silence between tools is ~30s) but short enough that a
// truly wedged runtime fails in 1 minute, not 5.
const idleTimeoutDuration = 60 * time.Second
// dispatchA2A POSTs `body` to `agentURL`. Uses WithoutCancel so delegation
// chains survive client disconnect (browser tab close). Default timeouts:
// canvas (callerID == "") = 5 min, agent-to-agent = 30 min. Callers can
// override via the X-Timeout header (applied to ctx upstream in ProxyA2A).
func (h *WorkspaceHandler) dispatchA2A(ctx context.Context, agentURL string, body []byte, callerID string) (*http.Response, context.CancelFunc, error) {
// chains survive client disconnect (browser tab close). Two layers of
// timeout per dispatch:
//
// - Idle timeout (always applied): cancels the dispatch when no
// broadcaster events for the workspace fire for
// idleTimeoutDuration. Any progress event resets the clock — so
// a long but actively-streaming reply runs forever, while a
// wedged runtime fails fast.
// - Absolute ceiling (agent-to-agent only): 30 min cap as a
// defence against runaway delegation loops. Canvas dispatches
// have no absolute ceiling — the user can wait as long as they
// want, the idle timer is the only hangup signal.
//
// Either layer is overridable by the X-Timeout header upstream in
// ProxyA2A; X-Timeout: 0 explicitly disables the absolute ceiling.
func (h *WorkspaceHandler) dispatchA2A(ctx context.Context, workspaceID, agentURL string, body []byte, callerID string) (*http.Response, context.CancelFunc, error) {
// #1483 SSRF defense-in-depth: the primary call path through
// proxyA2ARequest → resolveAgentURL already validates via isSafeURL
// (a2a_proxy.go:424), but adding the check here closes the gap for
@ -494,19 +526,41 @@ func (h *WorkspaceHandler) dispatchA2A(ctx context.Context, agentURL string, bod
return nil, nil, &proxyDispatchBuildError{err: err}
}
forwardCtx := context.WithoutCancel(ctx)
var cancel context.CancelFunc
var ceilingCancel context.CancelFunc
if _, hasDeadline := ctx.Deadline(); !hasDeadline {
if callerID == "" {
forwardCtx, cancel = context.WithTimeout(forwardCtx, 5*time.Minute)
} else {
forwardCtx, cancel = context.WithTimeout(forwardCtx, 30*time.Minute)
if callerID != "" {
forwardCtx, ceilingCancel = context.WithTimeout(forwardCtx, 30*time.Minute)
}
// callerID == "" (canvas): no absolute ceiling. The idle
// timeout below is the only deadline.
}
// Idle timeout — cancels the dispatch ctx after
// idleTimeoutDuration of broadcaster silence for this workspace.
// Always applied (canvas + agent-to-agent both benefit; the
// ceiling above is a separate runaway-loop cap that only fires
// for agent traffic). Combines with the ceiling cancel into a
// single returned cancel func that the caller defers.
// applyIdleTimeout needs SubscribeSSE which only lives on the
// concrete *Broadcaster, not on the EventEmitter interface the
// handler now stores. Type-assert + fall through to a no-op idle
// timer if the broadcaster doesn't support subscriptions (the
// EventEmitter mock used by some tests, e.g.). Production wires
// the concrete *Broadcaster, so the assertion always succeeds in
// real deploys.
var b *events.Broadcaster
if concrete, ok := h.broadcaster.(*events.Broadcaster); ok {
b = concrete
}
forwardCtx, idleCancel := applyIdleTimeout(forwardCtx, b, workspaceID, idleTimeoutDuration)
cancel := func() {
idleCancel()
if ceilingCancel != nil {
ceilingCancel()
}
}
req, err := http.NewRequestWithContext(forwardCtx, "POST", agentURL, bytes.NewReader(body))
if err != nil {
if cancel != nil {
cancel()
}
cancel()
// Wrap the construction failure so the caller can distinguish it
// from an upstream Do() error and produce the correct 500 response.
return nil, nil, &proxyDispatchBuildError{err: err}
@ -515,3 +569,52 @@ func (h *WorkspaceHandler) dispatchA2A(ctx context.Context, agentURL string, bod
resp, doErr := a2aClient.Do(req)
return resp, cancel, doErr
}
// applyIdleTimeout returns a child ctx that gets cancelled when no
// broadcaster events for `workspaceID` arrive for `idle` duration.
// Any incoming event resets the clock. The returned cancel func
// MUST be called to clean up the goroutine + subscription.
//
// nil broadcaster or non-positive idle returns the parent ctx
// unchanged (and a no-op cancel) so test paths that don't wire a
// broadcaster keep working.
func applyIdleTimeout(parent context.Context, b *events.Broadcaster, workspaceID string, idle time.Duration) (context.Context, context.CancelFunc) {
if b == nil || idle <= 0 || workspaceID == "" {
return parent, func() {}
}
ctx, cancel := context.WithCancel(parent)
sub, unsub := b.SubscribeSSE(workspaceID)
go func() {
defer unsub()
timer := time.NewTimer(idle)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return
case _, ok := <-sub:
if !ok {
// Subscription channel closed — fall back to
// pure-timer mode. Don't cancel: another caller
// may have closed our sub but the request itself
// is still in flight. Let the timer or the
// caller's defer drive cleanup.
continue
}
// Stop+drain pattern so a fired-but-unread timer
// doesn't double-cancel after the Reset.
if !timer.Stop() {
select {
case <-timer.C:
default:
}
}
timer.Reset(idle)
case <-timer.C:
cancel()
return
}
}
}()
return ctx, cancel
}

View File

@ -5,6 +5,7 @@ import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
@ -600,9 +601,21 @@ func TestIsUpstreamBusyError(t *testing.T) {
}{
{"nil", nil, false},
{"context.DeadlineExceeded", context.DeadlineExceeded, true},
// applyIdleTimeout cancels its child ctx via context.WithCancel
// when the broadcaster silence window elapses — surfaces here
// as context.Canceled. Same "upstream busy" classification.
{"context.Canceled", context.Canceled, true},
{"wrapped context.Canceled", fmt.Errorf("dispatch wrapped: %w", context.Canceled), true},
{"io.EOF", io.EOF, true},
{"io.ErrUnexpectedEOF", io.ErrUnexpectedEOF, true},
{"wrapped context deadline string", fmt.Errorf(`Post "http://ws-foo:8000": context deadline exceeded`), true},
// Real net/http wraps context.DeadlineExceeded via *url.Error.Unwrap,
// so errors.Is(err, context.DeadlineExceeded) catches it. The
// pre-892de784 substring "context deadline exceeded" fallback
// also accepted a string-only error like
// `fmt.Errorf("Post: context deadline exceeded")`; that fallback
// was dropped because errors.Is handles the real shape and the
// substring was indistinguishable from a user-content match.
{"wrapped context deadline (errors.Is path)", fmt.Errorf("Post: %w", context.DeadlineExceeded), true},
{"wrapped EOF string", fmt.Errorf(`Post "http://ws-foo:8000": EOF`), true},
{"connection reset", fmt.Errorf("read tcp 127.0.0.1:8080->127.0.0.1:12345: connection reset by peer"), true},
{"generic dns error", fmt.Errorf("no such host"), false},
@ -1074,7 +1087,7 @@ func TestDispatchA2A_BuildRequestError(t *testing.T) {
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
// Malformed URL causes http.NewRequestWithContext to fail.
_, cancel, err := handler.dispatchA2A(context.Background(), "http://%%badhost", []byte("{}"), "")
_, cancel, err := handler.dispatchA2A(context.Background(), "ws-target", "http://%%badhost", []byte("{}"), "")
if cancel != nil {
cancel()
}
@ -1097,13 +1110,13 @@ func TestDispatchA2A_CanvasTimeout(t *testing.T) {
}))
defer srv.Close()
resp, cancel, err := handler.dispatchA2A(context.Background(), srv.URL, []byte(`{}`), "")
resp, cancel, err := handler.dispatchA2A(context.Background(), "ws-target", srv.URL, []byte(`{}`), "")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer resp.Body.Close()
if cancel == nil {
t.Fatal("canvas caller (empty callerID) must set a timeout + return cancel")
t.Fatal("canvas caller must return a cancel func (idle-timeout cleanup)")
}
cancel() // restore
}
@ -1118,20 +1131,23 @@ func TestDispatchA2A_AgentTimeout(t *testing.T) {
}))
defer srv.Close()
resp, cancel, err := handler.dispatchA2A(context.Background(), srv.URL, []byte(`{}`), "ws-caller")
resp, cancel, err := handler.dispatchA2A(context.Background(), "ws-target", srv.URL, []byte(`{}`), "ws-caller")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer resp.Body.Close()
if cancel == nil {
t.Fatal("agent-to-agent caller must set a timeout + return cancel")
t.Fatal("agent-to-agent caller must return a cancel func (idle + ceiling cleanup)")
}
cancel()
}
func TestDispatchA2A_ContextDeadline_NoCancelAdded(t *testing.T) {
// When ctx already has a deadline, dispatchA2A must NOT layer its own
// timeout (cancel should be nil).
func TestDispatchA2A_ContextDeadline_NoExtraCeiling(t *testing.T) {
// When ctx already has a deadline, dispatchA2A must not layer
// its own absolute ceiling on top — the caller's deadline wins.
// The idle-timer cleanup still produces a non-nil cancel func
// (introduced by the always-on idle timeout) but the cancel func
// is safe to call repeatedly and from a deferred path.
setupTestDB(t)
setupTestRedis(t)
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
@ -1144,17 +1160,95 @@ func TestDispatchA2A_ContextDeadline_NoCancelAdded(t *testing.T) {
ctx, ctxCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer ctxCancel()
resp, cancel, err := handler.dispatchA2A(ctx, srv.URL, []byte(`{}`), "")
resp, cancel, err := handler.dispatchA2A(ctx, "ws-target", srv.URL, []byte(`{}`), "")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer resp.Body.Close()
if cancel != nil {
t.Error("cancel should be nil when ctx already has a deadline")
cancel()
if cancel == nil {
t.Error("cancel must be non-nil (idle-timer cleanup)")
}
}
// --- applyIdleTimeout ---
// TestApplyIdleTimeout_FiresOnSilence verifies the helper cancels its
// child ctx when no broadcaster events arrive for `idle` duration.
// Uses a short idle window (60ms) so the test runs fast.
func TestApplyIdleTimeout_FiresOnSilence(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
b := newTestBroadcaster()
parent, parentCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer parentCancel()
idleCtx, idleCancel := applyIdleTimeout(parent, b, "ws-silent", 60*time.Millisecond)
defer idleCancel()
select {
case <-idleCtx.Done():
// expected — no events ever arrived for ws-silent
case <-time.After(2 * time.Second):
t.Fatal("idleCtx never cancelled despite no events")
}
if !errors.Is(idleCtx.Err(), context.Canceled) {
t.Errorf("idleCtx err = %v, want context.Canceled", idleCtx.Err())
}
}
// TestApplyIdleTimeout_ResetsOnEvent verifies that a broadcaster event
// for the workspace resets the timer. Sends one event mid-window and
// confirms ctx is still alive after the original deadline would have
// fired, but cancelled after a second silence window elapses.
func TestApplyIdleTimeout_ResetsOnEvent(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
b := newTestBroadcaster()
parent, parentCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer parentCancel()
idle := 80 * time.Millisecond
idleCtx, idleCancel := applyIdleTimeout(parent, b, "ws-active", idle)
defer idleCancel()
// Send a progress event halfway through the window — should
// extend the deadline by another `idle`.
time.Sleep(idle / 2)
b.BroadcastOnly("ws-active", "ACTIVITY_LOGGED", map[string]interface{}{"activity_type": "agent_log"})
// At t = idle (original deadline), ctx must still be alive
// because the event reset the clock.
select {
case <-idleCtx.Done():
t.Fatal("idleCtx cancelled despite mid-window event resetting the timer")
case <-time.After(idle - (idle / 2) + 10*time.Millisecond):
// ok — past the original deadline, still alive
}
// Now wait for the second silence window to actually fire.
select {
case <-idleCtx.Done():
// expected
case <-time.After(idle + 200*time.Millisecond):
t.Fatal("idleCtx never cancelled after the second silence window")
}
}
// TestApplyIdleTimeout_NilBroadcasterDegradesGracefully — nil
// broadcaster (some test paths) returns the parent ctx unchanged.
func TestApplyIdleTimeout_NilBroadcasterDegradesGracefully(t *testing.T) {
parent := context.Background()
idleCtx, cancel := applyIdleTimeout(parent, nil, "ws-x", 50*time.Millisecond)
defer cancel()
if idleCtx != parent {
t.Error("nil broadcaster must return the parent ctx unchanged")
}
// And calling cancel must be safe.
cancel()
}
// TestDispatchA2A_RejectsUnsafeURL is the #1483 defense-in-depth
// regression. setupTestDB disables SSRF for normal tests so existing
// dispatchA2A unit tests can hit httptest.NewServer (loopback) — we
@ -1162,6 +1256,10 @@ func TestDispatchA2A_ContextDeadline_NoCancelAdded(t *testing.T) {
// Production callers go through resolveAgentURL which already
// validates; this test pins that dispatchA2A is now safe even when
// called directly by a future caller that skips resolveAgentURL.
//
// Note: dispatchA2A's signature includes workspaceID (added by the
// idle-timeout work) so this test passes a stub value — the SSRF check
// fires before workspaceID is referenced.
func TestDispatchA2A_RejectsUnsafeURL(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
@ -1172,6 +1270,7 @@ func TestDispatchA2A_RejectsUnsafeURL(t *testing.T) {
// Cloud metadata IP — must be rejected before any HTTP call goes out.
_, cancel, err := handler.dispatchA2A(
context.Background(),
"ws-target",
"http://169.254.169.254/latest/meta-data/",
[]byte(`{}`),
"",
@ -1188,6 +1287,7 @@ func TestDispatchA2A_RejectsUnsafeURL(t *testing.T) {
}
}
// --- handleA2ADispatchError ---
func TestHandleA2ADispatchError_ContextDeadline(t *testing.T) {

View File

@ -0,0 +1,415 @@
package handlers
// chat_files.go — file upload/download for workspace chat.
//
// Split from templates.go because these endpoints have a different
// security model (no /configs write, no template fallback) and a
// different wire format (multipart in, binary-stream out). Template
// files are agent workspace configuration; chat files are user-agent
// conversation payloads.
import (
"archive/tar"
"bytes"
"context"
"crypto/rand"
"encoding/hex"
"fmt"
"io"
"log"
"mime"
"mime/multipart"
"net/http"
"path/filepath"
"regexp"
"strings"
"github.com/docker/docker/api/types/container"
"github.com/gin-gonic/gin"
)
// ChatFilesHandler serves file upload + download for chat. It
// composes the existing TemplatesHandler's Docker plumbing
// (findContainer, execInContainer, copyFilesToContainer) rather than
// duplicating them, so a bug fix in the Docker layer propagates to
// both endpoints.
type ChatFilesHandler struct {
templates *TemplatesHandler
}
func NewChatFilesHandler(t *TemplatesHandler) *ChatFilesHandler {
return &ChatFilesHandler{templates: t}
}
// chatUploadMaxBytes caps the full multipart request body so a
// malicious / runaway client can't OOM the server. 50 MB covers most
// documents + a handful of images per message; larger artefacts
// should go through git/S3 rather than chat.
const chatUploadMaxBytes = 50 * 1024 * 1024
// chatUploadMaxFileBytes caps individual files in a multi-file upload.
// Keeping the per-file cap below the total lets a user send, say, a
// 5 MB PDF + 10 screenshots without tripping the batch limit on any
// single attachment.
const chatUploadMaxFileBytes = 25 * 1024 * 1024
// chatUploadDir is the in-container path where user-uploaded chat
// attachments land. Under /workspace so the file persists with the
// workspace volume and is readable by the agent without any extra
// plumbing — the agent just reads from the URI path we return.
const chatUploadDir = "/workspace/.molecule/chat-uploads"
// unsafeFilenameChars matches anything outside the conservative
// {alnum, dot, underscore, dash} set. Filenames get rewritten
// character-class at a time, so embedded paths, control chars,
// newlines, quotes, and shell metachars never reach the filesystem.
var unsafeFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9._\-]`)
// contentDispositionAttachment produces a safe `attachment; filename=...`
// header. Quotes, CR, and LF in the filename are escaped per RFC 6266 /
// RFC 5987: control chars dropped, backslash and double-quote
// backslash-escaped inside the quoted-string. Also emits the
// percent-encoded filename* parameter so non-ASCII names survive.
// This matters because agents can write arbitrary filenames into
// /workspace, and anything they produce reaches this header via
// `filepath.Base(path)` — not all agents sanitize on their side.
func contentDispositionAttachment(name string) string {
safeQ := make([]rune, 0, len(name))
for _, r := range name {
switch {
case r == '\r' || r == '\n':
// Drop — any CR/LF would terminate the header early.
continue
case r == '"' || r == '\\':
// Escape per RFC 6266 §4.1 quoted-string.
safeQ = append(safeQ, '\\', r)
case r < 0x20 || r == 0x7f:
// Drop other control chars.
continue
default:
safeQ = append(safeQ, r)
}
}
asciiSafe := string(safeQ)
// filename= — double-quoted, escaped. Gives legacy clients a value.
// filename*= — RFC 5987 percent-encoded UTF-8, preferred when present.
return fmt.Sprintf(`attachment; filename="%s"; filename*=UTF-8''%s`,
asciiSafe, urlPathEscape(name))
}
// urlPathEscape percent-encodes every byte outside the RFC 3986
// unreserved set — stricter than net/url.PathEscape (which leaves
// "/" unescaped because it's legal in URL paths). Filenames must
// never contain "/" anyway, so escaping it is defence-in-depth
// against an agent that writes a path-like name.
func urlPathEscape(s string) string {
const unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
var b strings.Builder
for _, c := range []byte(s) {
if strings.IndexByte(unreserved, c) >= 0 {
b.WriteByte(c)
} else {
fmt.Fprintf(&b, "%%%02X", c)
}
}
return b.String()
}
func sanitizeFilename(in string) string {
base := filepath.Base(in)
base = strings.ReplaceAll(base, " ", "_")
base = unsafeFilenameChars.ReplaceAllString(base, "_")
if len(base) > 100 {
ext := filepath.Ext(base)
if len(ext) > 16 {
ext = ""
}
base = base[:100-len(ext)] + ext
}
if base == "" || base == "." || base == ".." {
return "file"
}
return base
}
// ChatUploadedFile is the per-file response returned from POST
// /workspaces/:id/chat/uploads. Clients include this payload (or a
// trimmed subset) in their outgoing A2A `message/send` parts.
type ChatUploadedFile struct {
// URI uses a custom "workspace:" scheme so clients can resolve it
// against the streaming Download endpoint regardless of where the
// canvas itself is hosted. The path component is always absolute
// within the workspace container.
URI string `json:"uri"`
Name string `json:"name"`
MimeType string `json:"mimeType,omitempty"`
Size int64 `json:"size"`
}
// Upload handles POST /workspaces/:id/chat/uploads.
// Accepts multipart/form-data with one or more `files` fields, stages
// each under /workspace/.molecule/chat-uploads with a UUID prefix,
// and returns the list of URIs for the caller to attach to an A2A
// message.
func (h *ChatFilesHandler) Upload(c *gin.Context) {
workspaceID := c.Param("id")
if err := validateWorkspaceID(workspaceID); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
// Hard cap the request body BEFORE ParseMultipartForm — otherwise
// a client could chunk-upload past the cap before Go notices.
c.Request.Body = http.MaxBytesReader(c.Writer, c.Request.Body, chatUploadMaxBytes)
if err := c.Request.ParseMultipartForm(chatUploadMaxBytes); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to parse multipart form"})
return
}
form := c.Request.MultipartForm
var headers []*multipart.FileHeader
if form != nil && form.File != nil {
headers = form.File["files"]
}
if len(headers) == 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "expected at least one 'files' field"})
return
}
ctx := c.Request.Context()
containerName := h.templates.findContainer(ctx, workspaceID)
if containerName == "" {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
return
}
// Build the archive in memory. Files are byte-preserving through
// Go's string<->[]byte (the tar helper takes map[string]string but
// the conversion is a literal copy, not a UTF-8 reinterpretation).
archive := map[string]string{}
uploaded := make([]ChatUploadedFile, 0, len(headers))
for _, fh := range headers {
if fh.Size > chatUploadMaxFileBytes {
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": fmt.Sprintf("%s exceeds per-file limit (%d MB)", fh.Filename, chatUploadMaxFileBytes/(1024*1024)),
})
return
}
f, err := fh.Open()
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read upload"})
return
}
// LimitReader guards against a truthful-but-lying Size header:
// if the multipart stream carries more bytes than declared, we
// stop at the cap instead of growing the buffer.
data, err := io.ReadAll(io.LimitReader(f, chatUploadMaxFileBytes+1))
f.Close()
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read upload"})
return
}
if int64(len(data)) > chatUploadMaxFileBytes {
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": fmt.Sprintf("%s exceeds per-file limit (%d MB)", fh.Filename, chatUploadMaxFileBytes/(1024*1024)),
})
return
}
name := sanitizeFilename(fh.Filename)
// 16-byte (UUID-equivalent) random prefix. Within a single
// batch we also check for collisions — birthday on 128 bits
// is astronomical, but a bad PRNG or single re-used draw
// would silently overwrite a sibling upload with its own
// content and return two URIs pointing at one file.
var stored string
for attempt := 0; attempt < 4; attempt++ {
idBytes := make([]byte, 16)
if _, err := rand.Read(idBytes); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to allocate upload ID"})
return
}
candidate := hex.EncodeToString(idBytes) + "-" + name
if _, taken := archive[candidate]; !taken {
stored = candidate
break
}
}
if stored == "" {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to allocate unique upload ID"})
return
}
archive[stored] = string(data)
mt := fh.Header.Get("Content-Type")
if mt == "" {
mt = mime.TypeByExtension(filepath.Ext(name))
}
uploaded = append(uploaded, ChatUploadedFile{
URI: "workspace:" + chatUploadDir + "/" + stored,
Name: name,
MimeType: mt,
Size: int64(len(data)),
})
}
// mkdir -p is idempotent; we fire it every upload instead of
// caching state here so container restarts don't surprise us.
_, _ = h.templates.execInContainer(ctx, containerName, []string{"mkdir", "-p", chatUploadDir})
// Defence in depth: pre-remove each target path before extracting
// the tar. An agent with write access to /workspace could in
// theory race-create a symlink at <chatUploadDir>/<stored-name>
// pointing at a sensitive in-container path (its own /etc/*,
// mounted secrets). Docker's tar extraction on some drivers
// follows pre-existing symlinks at the destination. `rm -f` the
// exact stored-name closes that window — the UUID prefix on the
// name makes a successful race effectively impossible, but this
// guard costs nothing and documents the intent.
rmArgs := []string{"rm", "-f", "--"}
for stored := range archive {
rmArgs = append(rmArgs, chatUploadDir+"/"+stored)
}
_, _ = h.templates.execInContainer(ctx, containerName, rmArgs)
if err := h.copyFlatToContainer(ctx, containerName, chatUploadDir, archive); err != nil {
log.Printf("Chat upload copy failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to stage files in workspace"})
return
}
c.JSON(http.StatusOK, gin.H{"files": uploaded})
}
// copyFlatToContainer extracts one tar of flat files into destPath
// inside the container. Unlike the shared copyFilesToContainer helper
// (which prepends destPath into tar entry names — correct for its
// callers whose files relative-live inside a nested tree), this
// helper writes tar entries with ONLY the flat filename so Docker's
// extraction at destPath lands them directly in destPath, not at
// destPath/destPath/... as the shared helper would.
// Filenames are validated to contain no path separator so nothing
// can escape destPath via an embedded "../" or a leading "/".
func (h *ChatFilesHandler) copyFlatToContainer(ctx context.Context, containerName, destPath string, files map[string]string) error {
if h.templates.docker == nil {
return fmt.Errorf("docker not available")
}
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
for name, content := range files {
if strings.ContainsAny(name, "/\\") || name == ".." || name == "." || name == "" {
return fmt.Errorf("unsafe flat filename: %q", name)
}
data := []byte(content)
if err := tw.WriteHeader(&tar.Header{
Name: name, // relative — Docker resolves against destPath
Mode: 0644,
Size: int64(len(data)),
Typeflag: tar.TypeReg,
}); err != nil {
return fmt.Errorf("tar header %q: %w", name, err)
}
if _, err := tw.Write(data); err != nil {
return fmt.Errorf("tar write %q: %w", name, err)
}
}
if err := tw.Close(); err != nil {
return fmt.Errorf("tar close: %w", err)
}
return h.templates.docker.CopyToContainer(ctx, containerName, destPath, &buf, container.CopyToContainerOptions{})
}
// Download handles GET /workspaces/:id/chat/download?path=<abs path>.
// Streams the file bytes from the container with a correct
// Content-Type and attachment Content-Disposition. Binary-safe —
// unlike the existing JSON ReadFile endpoint which carries content
// as a string (lossy for non-UTF-8 bytes).
func (h *ChatFilesHandler) Download(c *gin.Context) {
workspaceID := c.Param("id")
if err := validateWorkspaceID(workspaceID); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
path := c.Query("path")
if path == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "path query required"})
return
}
if !filepath.IsAbs(path) {
c.JSON(http.StatusBadRequest, gin.H{"error": "path must be absolute"})
return
}
// Path must land under one of the allowed roots — mirrors the
// ReadFile security model and prevents arbitrary reads of /etc
// or other system paths via this endpoint.
rooted := false
for root := range allowedRoots {
if path == root || strings.HasPrefix(path, root+"/") {
rooted = true
break
}
}
if !rooted {
c.JSON(http.StatusBadRequest, gin.H{"error": "path must be under /configs, /workspace, /home, or /plugins"})
return
}
// Reject anything that canonicalises differently or contains a
// traversal segment. Defence-in-depth on top of the prefix check.
if filepath.Clean(path) != path || strings.Contains(path, "..") {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid path"})
return
}
ctx := c.Request.Context()
if h.templates.docker == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "docker unavailable"})
return
}
containerName := h.templates.findContainer(ctx, workspaceID)
if containerName == "" {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
return
}
// docker cp returns a tar stream containing the requested path.
// For a regular file that's a single tar entry; we extract and
// stream the body through.
reader, _, err := h.templates.docker.CopyFromContainer(ctx, containerName, path)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "file not found"})
return
}
defer reader.Close()
tr := tar.NewReader(reader)
hdr, err := tr.Next()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read archive"})
return
}
if hdr.Typeflag != tar.TypeReg {
c.JSON(http.StatusBadRequest, gin.H{"error": "path is not a regular file"})
return
}
name := filepath.Base(path)
mt := mime.TypeByExtension(filepath.Ext(name))
if mt == "" {
mt = "application/octet-stream"
}
c.Header("Content-Type", mt)
c.Header("Content-Length", fmt.Sprintf("%d", hdr.Size))
c.Header("Content-Disposition", contentDispositionAttachment(name))
c.Status(http.StatusOK)
// Stream exactly hdr.Size bytes. CopyN was chosen over LimitReader
// because it returns an error when the source is short — that
// surfaces a bug in the tar extraction path immediately instead
// of silently truncating. Agents can legitimately produce files
// larger than the 50 MB upload cap (that's a per-request inbound
// cap, not a per-artifact one), so we cannot clamp here.
if _, err := io.CopyN(c.Writer, tr, hdr.Size); err != nil {
log.Printf("Chat download stream error for %s (%s): %v", workspaceID, path, err)
}
}

View File

@ -0,0 +1,194 @@
package handlers
// Unit tests for chat_files.go. The Docker-touching paths (Upload
// actually copying into a container, Download actually streaming tar)
// are exercised via integration tests — docker-in-docker is out of
// scope for the unit suite. These tests cover the validation + error
// surfaces that a caller can reach without a running container.
import (
"bytes"
"mime/multipart"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/gin-gonic/gin"
)
func TestSanitizeFilename(t *testing.T) {
cases := []struct {
in, want string
}{
{"report.pdf", "report.pdf"},
{"my file.pdf", "my_file.pdf"},
{"../../etc/passwd", "passwd"},
{"weird;$name`.txt", "weird__name_.txt"},
{"", "file"},
{".", "file"},
{"..", "file"},
}
for _, tc := range cases {
got := sanitizeFilename(tc.in)
if got != tc.want {
t.Errorf("sanitizeFilename(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}
func TestSanitizeFilename_LongNamePreservesExtension(t *testing.T) {
// 120-char base + .pdf — the helper should truncate the base but
// keep the extension intact so content-type inference still works.
longBase := strings.Repeat("a", 120)
got := sanitizeFilename(longBase + ".pdf")
if len(got) > 100 {
t.Errorf("filename not truncated: len=%d", len(got))
}
if !strings.HasSuffix(got, ".pdf") {
t.Errorf("extension stripped: %q", got)
}
}
func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil)
h := NewChatFilesHandler(tmplh)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "not-a-uuid"}}
c.Request = httptest.NewRequest("POST", "/workspaces/not-a-uuid/chat/uploads", nil)
h.Upload(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 on invalid workspace id, got %d: %s", w.Code, w.Body.String())
}
}
func TestChatUpload_MissingFiles(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil)
h := NewChatFilesHandler(tmplh)
// Multipart body with no `files` field — only a text field.
var buf bytes.Buffer
mw := multipart.NewWriter(&buf)
_ = mw.WriteField("other", "value")
mw.Close()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
req := httptest.NewRequest("POST", "/workspaces/00000000-0000-0000-0000-000000000001/chat/uploads", &buf)
req.Header.Set("Content-Type", mw.FormDataContentType())
c.Request = req
h.Upload(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 when files field missing, got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "files") {
t.Errorf("expected error to mention files field: %s", w.Body.String())
}
}
func TestChatDownload_InvalidPath(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil)
h := NewChatFilesHandler(tmplh)
cases := []struct {
name, path, wantSubstr string
}{
{"empty", "", "path query required"},
{"relative", "workspace/foo.txt", "must be absolute"},
{"wrong root", "/etc/passwd", "must be under"},
{"traversal", "/workspace/../etc/passwd", "invalid path"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
req := httptest.NewRequest("GET", "/workspaces/xxx/chat/download?path="+tc.path, nil)
c.Request = req
h.Download(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for %s, got %d: %s", tc.name, w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), tc.wantSubstr) {
t.Errorf("expected error to contain %q, got: %s", tc.wantSubstr, w.Body.String())
}
})
}
}
func TestContentDispositionAttachment_Escapes(t *testing.T) {
cases := []struct {
name, input, wantSubstr string
}{
{
name: "plain ASCII passes through",
input: "report.pdf",
wantSubstr: `filename="report.pdf"`,
},
{
name: "double-quote is backslash-escaped",
input: `weird".pdf`,
wantSubstr: `filename="weird\".pdf"`,
},
{
name: "CR and LF dropped to prevent header injection",
input: "bad\r\nX-Leak: 1\r\n.txt",
wantSubstr: `filename="badX-Leak: 1.txt"`,
},
{
name: "non-ASCII emits filename* percent-encoded",
input: "résumé.pdf",
wantSubstr: "filename*=UTF-8''r%C3%A9sum%C3%A9.pdf",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := contentDispositionAttachment(tc.input)
if !strings.Contains(got, tc.wantSubstr) {
t.Errorf("contentDispositionAttachment(%q) = %q, missing substring %q", tc.input, got, tc.wantSubstr)
}
// Must never contain a bare CR or LF — either would end the header.
if strings.ContainsAny(got, "\r\n") {
t.Errorf("header contains CR/LF: %q", got)
}
})
}
}
func TestChatDownload_DockerUnavailable(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil) // docker=nil
h := NewChatFilesHandler(tmplh)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
req := httptest.NewRequest("GET", "/workspaces/xxx/chat/download?path=/workspace/report.pdf", nil)
c.Request = req
h.Download(c)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503 when docker is nil, got %d: %s", w.Code, w.Body.String())
}
}

View File

@ -5,6 +5,7 @@ package handlers
import (
"context"
"encoding/json"
"fmt"
"log"
"net/http"
@ -180,6 +181,108 @@ func NewOrgHandler(wh *WorkspaceHandler, b *events.Broadcaster, p *provisioner.P
}
}
// EnvRequirement is either a single env var name (strict: that exact
// var must be configured) or an any-of group (any one of the listed
// names satisfies the requirement).
//
// YAML shapes accepted:
//
// required_env:
// - GITHUB_TOKEN # single
// - any_of: [ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN] # OR group
//
// The any-of form exists because some runtimes accept either of two
// credential shapes — Claude Code takes ANTHROPIC_API_KEY or an OAuth
// token interchangeably, and forcing an org template to pick one
// would falsely block the other. For JSON (GET /org/templates),
// the same shapes round-trip: strings stay strings, groups stay
// {any_of: [...]}.
type EnvRequirement struct {
// Name is non-empty for a single required env var.
Name string
// AnyOf is non-empty for an OR group; any one member satisfies.
AnyOf []string
}
// Members returns every env name this requirement considers —
// [Name] for single, AnyOf for groups. Used by preflight, collect,
// and the name-validation regex gate.
func (e EnvRequirement) Members() []string {
if e.Name != "" {
return []string{e.Name}
}
return e.AnyOf
}
// IsSatisfied reports whether any member of the requirement is
// present in `configured`. Single: exact-match. AnyOf: at least
// one hit.
func (e EnvRequirement) IsSatisfied(configured map[string]struct{}) bool {
for _, m := range e.Members() {
if _, ok := configured[m]; ok {
return true
}
}
return false
}
// UnmarshalYAML accepts either a scalar (string → single) or a map
// with an `any_of` list (→ group).
func (e *EnvRequirement) UnmarshalYAML(value *yaml.Node) error {
if value.Kind == yaml.ScalarNode {
var s string
if err := value.Decode(&s); err != nil {
return err
}
e.Name = s
return nil
}
var alt struct {
AnyOf []string `yaml:"any_of"`
}
if err := value.Decode(&alt); err != nil {
return fmt.Errorf("env requirement must be a string or {any_of: [...]}: %w", err)
}
if len(alt.AnyOf) == 0 {
return fmt.Errorf("env requirement any_of must contain at least one env var")
}
e.AnyOf = alt.AnyOf
return nil
}
// MarshalJSON emits the dual shape so GET /org/templates callers get
// {"required_env": ["GITHUB_TOKEN", {"any_of": [...]}]}, matching
// the YAML syntax.
func (e EnvRequirement) MarshalJSON() ([]byte, error) {
if e.Name != "" {
return json.Marshal(e.Name)
}
return json.Marshal(struct {
AnyOf []string `json:"any_of"`
}{AnyOf: e.AnyOf})
}
// UnmarshalJSON is the inverse — accepts the same dual shape so
// POST /org/import with an inline `template` body works too.
func (e *EnvRequirement) UnmarshalJSON(data []byte) error {
var s string
if err := json.Unmarshal(data, &s); err == nil {
e.Name = s
return nil
}
var alt struct {
AnyOf []string `json:"any_of"`
}
if err := json.Unmarshal(data, &alt); err != nil {
return fmt.Errorf("env requirement must be a string or {any_of: [...]}: %w", err)
}
if len(alt.AnyOf) == 0 {
return fmt.Errorf("env requirement any_of must contain at least one env var")
}
e.AnyOf = alt.AnyOf
return nil
}
// OrgTemplate is the YAML structure for an org hierarchy.
type OrgTemplate struct {
Name string `yaml:"name" json:"name"`
@ -189,6 +292,18 @@ type OrgTemplate struct {
// GlobalMemories is a list of org-wide memories seeded as GLOBAL scope
// on the first root workspace (PM) during org import. Issue #1050.
GlobalMemories []models.MemorySeed `yaml:"global_memories" json:"global_memories"`
// RequiredEnv lists env vars that MUST be configured globally (or
// on every workspace in the subtree that needs them) before import
// succeeds. Each entry is either a plain string (strict) or an
// {any_of: [...]} group (at least one member must be set). Declared
// at the org level for shared creds; also extensible per-workspace
// via OrgWorkspace.RequiredEnv for team-scoped credentials.
RequiredEnv []EnvRequirement `yaml:"required_env" json:"required_env"`
// RecommendedEnv is the "nice-to-have" tier — import still succeeds
// without them, but features degrade. Same single|any_of shape as
// RequiredEnv so a recommended OR group reads "set any one of these
// to unlock the feature; all missing = warning".
RecommendedEnv []EnvRequirement `yaml:"recommended_env" json:"recommended_env"`
}
type OrgDefaults struct {
@ -297,7 +412,17 @@ type OrgWorkspace struct {
X float64 `yaml:"x" json:"x"`
Y float64 `yaml:"y" json:"y"`
} `yaml:"canvas" json:"canvas"`
Children []OrgWorkspace `yaml:"children" json:"children"`
// RequiredEnv / RecommendedEnv declared at the workspace level
// narrow down what a specific team needs beyond the org-wide union.
// When GET /org/templates walks the tree, these flow up into
// OrgTemplate.RequiredEnv / RecommendedEnv. A workspace's subtree
// inherits: a parent declaring ANTHROPIC_API_KEY as required
// means every descendant considers it required too (no override
// needed at each leaf). Same single|any_of shape as the org-level
// lists.
RequiredEnv []EnvRequirement `yaml:"required_env" json:"required_env"`
RecommendedEnv []EnvRequirement `yaml:"recommended_env" json:"recommended_env"`
Children []OrgWorkspace `yaml:"children" json:"children"`
}
// ListTemplates handles GET /org/templates — lists available org templates.
@ -356,11 +481,18 @@ func (h *OrgHandler) ListTemplates(c *gin.Context) {
continue
}
count := countWorkspaces(tmpl.Workspaces)
// Walk the tree to collect required + recommended env union.
// Canvas uses these to render a preflight modal BEFORE firing
// the import — saves the user from a 15-workspace import that
// dies one container at a time on missing creds.
required, recommended := collectOrgEnv(&tmpl)
templates = append(templates, map[string]interface{}{
"dir": e.Name(),
"name": tmpl.Name,
"description": tmpl.Description,
"workspaces": count,
"dir": e.Name(),
"name": tmpl.Name,
"description": tmpl.Description,
"workspaces": count,
"required_env": required,
"recommended_env": recommended,
})
}
@ -372,6 +504,13 @@ func (h *OrgHandler) Import(c *gin.Context) {
var body struct {
Dir string `json:"dir"` // org template directory name
Template OrgTemplate `json:"template"` // or inline template
// Force skips the required-env preflight. Used by tooling
// that already computed the preflight client-side and wants
// to proceed despite missing creds (usually because the
// user explicitly acknowledged the tradeoff). Default behavior
// refuses the import with a 412 and the missing-key list so
// the canvas can surface them in its preflight modal.
Force bool `json:"force"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
@ -417,6 +556,59 @@ func (h *OrgHandler) Import(c *gin.Context) {
return
}
// Required-env preflight — refuses import when any required_env is
// missing from global_secrets (unless `force: true` overrides). The
// canvas runs the same check client-side against GET /org/templates
// output and shows a modal so users set keys before clicking Import;
// this server-side check is the authoritative guard in case a caller
// bypasses the UI (CLI, API clients, etc.). 412 Precondition Failed
// carries the missing-key list so tooling can render the same
// add-key flow.
required, _ := collectOrgEnv(&tmpl)
if body.Force {
// Log the bypass so a post-incident search can find who
// imported an org with missing creds. The common audit flow
// treats log.Printf at INFO as the low-cost trail for
// explicit-override actions — keeps force as a supported
// knob but makes it investigable.
log.Printf("Org import: force=true bypass — template=%q, required_env=%v", tmpl.Name, required)
} else if len(required) > 0 {
ctx := c.Request.Context()
configured, err := loadConfiguredGlobalSecretKeys(ctx)
if err != nil {
// Fail closed. Previously this fell through and imported
// anyway, defeating the preflight for exactly the case
// it's meant to cover. A DB hiccup should look like a
// retryable 500, not a silent green light for an import
// that will fail at container-start time on every node.
log.Printf("Org import preflight: global secrets lookup failed: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{
"error": "could not verify required environment variables; try again or pass force=true to override",
})
return
}
var missing []EnvRequirement
for _, req := range required {
// For a single requirement this is exact-match; for an
// any-of group, any one member satisfies. Groups whose
// alternative is already configured drop out here — the
// user doesn't need to re-configure them.
if !req.IsSatisfied(configured) {
missing = append(missing, req)
}
}
if len(missing) > 0 {
c.JSON(http.StatusPreconditionFailed, gin.H{
"error": "missing required environment variables",
"missing_env": missing,
"required_env": required,
"template": tmpl.Name,
"suggestion": "set these as global secrets (POST /settings/secrets) or pass force=true to override",
})
return
}
}
results := []map[string]interface{}{}
var createErr error
@ -428,7 +620,8 @@ func (h *OrgHandler) Import(c *gin.Context) {
// using subtree-aware grid slots (children that are themselves
// parents get a bigger slot so they don't overflow into siblings).
for _, ws := range tmpl.Workspaces {
if err := h.createWorkspaceTree(ws, nil, ws.Canvas.X, ws.Canvas.Y, tmpl.Defaults, orgBaseDir, &results, provisionSem); err != nil {
// Root: relX/relY == absX/absY (no parent to be relative to).
if err := h.createWorkspaceTree(ws, nil, ws.Canvas.X, ws.Canvas.Y, ws.Canvas.X, ws.Canvas.Y, tmpl.Defaults, orgBaseDir, &results, provisionSem); err != nil {
createErr = err
break
}

View File

@ -10,6 +10,8 @@ import (
"log"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
@ -28,7 +30,13 @@ import (
// parent.abs + childSlotInGrid(index, siblingSizes) computed by the
// caller. Storing already-absolute coords means a child that is itself
// a parent can simply compound the grid without any per-call math.
func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX, absY float64, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
// relX / relY are THIS workspace's position RELATIVE to its parent's
// absolute origin (i.e. childSlotInGrid output for children; 0,0 for
// roots since a root's absolute IS its relative). The broadcast
// payload ships relative coords so the canvas can drop the node
// straight into the parent's child-coordinate space without doing a
// canvas-wide absolute-position walk.
func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX, absY, relX, relY float64, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
// Apply defaults
runtime := ws.Runtime
if runtime == "" {
@ -132,10 +140,23 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
}
// Broadcast — include runtime so the canvas pill renders the right
// badge immediately instead of "unknown".
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISIONING", id, map[string]interface{}{
// badge immediately instead of "unknown". parent_id + x/y let the
// canvas's org-deploy animation spawn the child from the parent's
// current coords and tween into its reserved slot, instead of
// landing in a default grid position first and snapping on the
// next hydrate.
payload := map[string]interface{}{
"name": ws.Name, "tier": tier, "runtime": runtime,
})
// Parent-relative coords — the canvas's React Flow node uses
// these as the node's position when parent_id is set (React
// Flow treats node.position as parent-relative when the node
// has a parentId). For roots, relX/relY == absX/absY.
"x": relX, "y": relY,
}
if parentID != nil {
payload["parent_id"] = *parentID
}
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISIONING", id, payload)
// Seed initial memories from workspace config or defaults (issue #1050).
// Per-workspace initial_memories override defaults; if workspace has none,
@ -513,7 +534,9 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
slotX, slotY := childSlotInGrid(i, siblingSizes)
childAbsX := absX + slotX
childAbsY := absY + slotY
if err := h.createWorkspaceTree(child, &id, childAbsX, childAbsY, defaults, orgBaseDir, results, provisionSem); err != nil {
// slotX/slotY are already parent-relative — that's
// exactly what childSlotInGrid returns.
if err := h.createWorkspaceTree(child, &id, childAbsX, childAbsY, slotX, slotY, defaults, orgBaseDir, results, provisionSem); err != nil {
return err
}
time.Sleep(workspaceCreatePacingMs * time.Millisecond)
@ -523,6 +546,213 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
return nil
}
// envVarNamePattern guards template-supplied env var names against
// pathological inputs. A malicious template could ship
// required_env: ["'; DROP …"] or whitespace-only entries that would
// flow through collectOrgEnv → into the 412 response body and,
// worse, into the modal's PUT /settings/secrets input. Schema
// already has `key TEXT NOT NULL UNIQUE` and our queries are
// parameterised so SQL injection isn't the threat — the real risks
// are UI rendering weirdness (newlines, NUL bytes, zero-width chars)
// and downstream env-var semantics (POSIX requires uppercase +
// underscore + digit). A strict regex filters both classes of
// problem at a single choke point.
var envVarNamePattern = regexp.MustCompile(`^[A-Z][A-Z0-9_]{0,127}$`)
// sanitizeEnvMembers filters a requirement's member list through the
// name-validation regex, logging rejections. Returns the filtered
// list and a boolean indicating whether any valid members remain.
// Used so a group containing one valid + one bogus name is kept
// (valid member carries the group) rather than silently dropped.
func sanitizeEnvMembers(members []string, where string) ([]string, bool) {
out := make([]string, 0, len(members))
for _, k := range members {
if !envVarNamePattern.MatchString(k) {
if k != "" {
log.Printf("collectOrgEnv: rejecting invalid env var name %q from %s (must match %s)", k, where, envVarNamePattern)
}
continue
}
out = append(out, k)
}
return out, len(out) > 0
}
// envRequirementKey canonicalises a requirement for dedup — sorted
// member list joined with NUL so `any_of: [A, B]` and `any_of: [B, A]`
// collapse to the same key. Single requirements are length-1 groups.
func envRequirementKey(members []string) string {
cp := append([]string(nil), members...)
sort.Strings(cp)
return strings.Join(cp, "\x00")
}
// collectOrgEnv walks the whole template tree and returns the union of
// required_env and recommended_env declared anywhere — at the org
// level, on root workspaces, or on any nested child. Deduplicates by
// group membership (same set of members = same requirement) and
// sorts deterministically so the canvas sees a stable order.
//
// "Required wins" rules:
//
// - A requirement that appears in BOTH required and recommended
// (same members) surfaces only as required.
// - A single-name requirement (e.g. "API_KEY") and a group that
// contains that same name (e.g. {any_of: [API_KEY, OTHER]}) are
// NOT deduplicated — they're semantically different (strict vs
// satisfiable-by-alternative) and the stricter "single" one wins,
// so the any-of group is dropped when its members overlap with a
// strict requirement declared elsewhere.
//
// Invalid names fail envVarNamePattern; the filter is applied per
// group so a group with one bogus entry keeps the rest. A group
// whose ALL members are invalid is dropped entirely with a log.
func collectOrgEnv(tmpl *OrgTemplate) (required, recommended []EnvRequirement) {
reqByKey := map[string]EnvRequirement{}
recByKey := map[string]EnvRequirement{}
// Names covered by strict (single) required entries. A group in
// EITHER tier whose any-of contains ONE of these names is
// dominated by the strict requirement and gets dropped on the
// second pass.
strictRequiredNames := map[string]struct{}{}
accept := func(into map[string]EnvRequirement, src []EnvRequirement, where string, markStrict bool) {
for _, req := range src {
members, ok := sanitizeEnvMembers(req.Members(), where)
if !ok {
continue
}
key := envRequirementKey(members)
if _, exists := into[key]; exists {
continue
}
if req.Name != "" && len(members) == 1 {
into[key] = EnvRequirement{Name: members[0]}
if markStrict {
strictRequiredNames[members[0]] = struct{}{}
}
} else {
into[key] = EnvRequirement{AnyOf: members}
}
}
}
accept(reqByKey, tmpl.RequiredEnv, "template root", true)
accept(recByKey, tmpl.RecommendedEnv, "template root", false)
var walk func([]OrgWorkspace)
walk = func(ws []OrgWorkspace) {
for _, w := range ws {
accept(reqByKey, w.RequiredEnv, "workspace "+w.Name, true)
accept(recByKey, w.RecommendedEnv, "workspace "+w.Name, false)
walk(w.Children)
}
}
walk(tmpl.Workspaces)
// Required wins across tiers: any requirement whose members
// overlap with a strict required name gets dropped from
// recommended. Keeps the canvas modal from showing the same
// key in both sections.
prune := func(from map[string]EnvRequirement) {
for k, r := range from {
for _, m := range r.Members() {
if _, strict := strictRequiredNames[m]; strict {
delete(from, k)
break
}
}
}
}
prune(recByKey)
// Same-tier: a strict required X dominates any-of groups in
// required that CONTAIN X (a group saying "any of X, Y" is
// automatically satisfied when X is required anyway, so it's
// redundant). Same logic applied to recommended.
pruneSameTier := func(tier map[string]EnvRequirement) {
strictInTier := map[string]struct{}{}
for _, r := range tier {
if r.Name != "" {
strictInTier[r.Name] = struct{}{}
}
}
for k, r := range tier {
if len(r.AnyOf) == 0 {
continue
}
for _, m := range r.AnyOf {
if _, strict := strictInTier[m]; strict {
delete(tier, k)
break
}
}
}
}
pruneSameTier(reqByKey)
pruneSameTier(recByKey)
required = flattenAndSortRequirements(reqByKey)
recommended = flattenAndSortRequirements(recByKey)
return required, recommended
}
func flattenAndSortRequirements(by map[string]EnvRequirement) []EnvRequirement {
out := make([]EnvRequirement, 0, len(by))
for _, r := range by {
out = append(out, r)
}
sort.Slice(out, func(i, j int) bool {
// Sort singles first by name; groups after, ordered by
// joined-member string. Gives the canvas a deterministic
// render order so the same template always produces the
// same modal layout.
iSingle := out[i].Name != ""
jSingle := out[j].Name != ""
if iSingle != jSingle {
return iSingle
}
if iSingle {
return out[i].Name < out[j].Name
}
return envRequirementKey(out[i].AnyOf) < envRequirementKey(out[j].AnyOf)
})
return out
}
// loadConfiguredGlobalSecretKeys returns the set of key names present
// in global_secrets WHERE the encrypted_value is non-empty. Filtering
// on the payload size catches the failure mode where a row was
// upserted with an empty value (historical rows predating the
// binding:"required" guard on SetGlobal, or a future direct SQL
// path that skips it) — the preflight would otherwise report the
// key as "configured" and the per-container preflight would still
// fail at start time, defeating the whole feature.
// The LIMIT is a sanity cap: at realistic tenant sizes (< 1k
// secrets) it's a no-op; at pathological sizes it stops one slow
// query from wedging org imports. A hit gets logged so operators
// can investigate.
const globalSecretsPreflightLimit = 10000
func loadConfiguredGlobalSecretKeys(ctx context.Context) (map[string]struct{}, error) {
rows, err := db.DB.QueryContext(ctx,
`SELECT key FROM global_secrets WHERE octet_length(encrypted_value) > 0 LIMIT $1`,
globalSecretsPreflightLimit)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]struct{}{}
for rows.Next() {
var k string
if scanErr := rows.Scan(&k); scanErr == nil && k != "" {
out[k] = struct{}{}
}
}
if len(out) == globalSecretsPreflightLimit {
log.Printf("loadConfiguredGlobalSecretKeys: hit LIMIT %d — org-import preflight may be incomplete", globalSecretsPreflightLimit)
}
return out, rows.Err()
}
func countWorkspaces(workspaces []OrgWorkspace) int {
count := len(workspaces)
for _, ws := range workspaces {

View File

@ -1,6 +1,7 @@
package handlers
import (
"sort"
"strings"
"testing"
"time"
@ -650,3 +651,428 @@ func TestOrgImport_ScheduleComputeError(t *testing.T) {
})
}
}
// ============================================================================
// Org env-preflight aggregation (collectOrgEnv)
// ============================================================================
// strictReq builds a slice of single-name EnvRequirements for test
// fixtures. Equivalent to the old []string literal but wrapped in
// the new union shape.
func strictReq(names ...string) []EnvRequirement {
out := make([]EnvRequirement, 0, len(names))
for _, n := range names {
out = append(out, EnvRequirement{Name: n})
}
return out
}
// anyOfReq builds a single any-of EnvRequirement for test fixtures.
func anyOfReq(names ...string) EnvRequirement {
return EnvRequirement{AnyOf: append([]string(nil), names...)}
}
// reqNames flattens a slice of EnvRequirements into a single comparable
// slice: single-name reqs contribute their Name, any-of reqs contribute
// "anyOf(A|B|C)" with members sorted for deterministic output. Lets
// tests assert against a string form regardless of which kind each
// entry takes.
func reqNames(reqs []EnvRequirement) []string {
out := make([]string, 0, len(reqs))
for _, r := range reqs {
if r.Name != "" {
out = append(out, r.Name)
continue
}
members := append([]string(nil), r.AnyOf...)
sort.Strings(members)
out = append(out, "anyOf("+strings.Join(members, "|")+")")
}
return out
}
func TestCollectOrgEnv_UnionAcrossLevels(t *testing.T) {
tmpl := &OrgTemplate{
RequiredEnv: strictReq("ANTHROPIC_API_KEY"),
RecommendedEnv: strictReq("SLACK_WEBHOOK_URL"),
Workspaces: []OrgWorkspace{
{
Name: "Root",
RequiredEnv: strictReq("GITHUB_TOKEN"),
Children: []OrgWorkspace{
{
Name: "Leaf",
RequiredEnv: strictReq("OPENROUTER_API_KEY"),
RecommendedEnv: strictReq("DISCORD_WEBHOOK_URL"),
},
},
},
},
}
req, rec := collectOrgEnv(tmpl)
// Required is the union of top-level + root + leaf.
wantReq := []string{"ANTHROPIC_API_KEY", "GITHUB_TOKEN", "OPENROUTER_API_KEY"}
if !stringSlicesEqual(reqNames(req), wantReq) {
t.Errorf("required mismatch: got %v, want %v", reqNames(req), wantReq)
}
wantRec := []string{"DISCORD_WEBHOOK_URL", "SLACK_WEBHOOK_URL"}
if !stringSlicesEqual(reqNames(rec), wantRec) {
t.Errorf("recommended mismatch: got %v, want %v", reqNames(rec), wantRec)
}
}
func TestCollectOrgEnv_RequiredWinsOverRecommended(t *testing.T) {
// Same key declared at one layer as recommended and another as
// required MUST surface only on the required side — a required
// declaration is strictly stricter than a recommended one, and
// listing it in both tiers would confuse the preflight modal.
tmpl := &OrgTemplate{
RecommendedEnv: strictReq("API_KEY"),
Workspaces: []OrgWorkspace{
{Name: "X", RequiredEnv: strictReq("API_KEY")},
},
}
req, rec := collectOrgEnv(tmpl)
if len(req) != 1 || req[0].Name != "API_KEY" {
t.Errorf("required should contain API_KEY, got %v", reqNames(req))
}
for _, r := range rec {
if r.Name == "API_KEY" {
t.Errorf("API_KEY must not appear in recommended once required elsewhere")
}
}
}
func TestCollectOrgEnv_Dedup(t *testing.T) {
// Same key declared twice at different levels should appear once.
tmpl := &OrgTemplate{
RequiredEnv: strictReq("K", "K"),
Workspaces: []OrgWorkspace{
{Name: "A", RequiredEnv: strictReq("K")},
{Name: "B", RequiredEnv: strictReq("K"), Children: []OrgWorkspace{
{Name: "C", RequiredEnv: strictReq("K")},
}},
},
}
req, _ := collectOrgEnv(tmpl)
if len(req) != 1 || req[0].Name != "K" {
t.Errorf("dedup failed: got %v, want [K]", reqNames(req))
}
}
func TestCollectOrgEnv_Empty(t *testing.T) {
tmpl := &OrgTemplate{}
req, rec := collectOrgEnv(tmpl)
if len(req) != 0 || len(rec) != 0 {
t.Errorf("empty template should produce empty slices, got req=%v rec=%v", reqNames(req), reqNames(rec))
}
}
// stringSlicesEqual checks ordered equality — collectOrgEnv sorts its
// output so callers can do deterministic comparisons.
func stringSlicesEqual(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
func TestCollectOrgEnv_RequiredWinsOnSameStruct(t *testing.T) {
// The same key declared required AND recommended on the SAME
// workspace node (rare but legal to parse) must still dedup
// correctly and end up required-only.
tmpl := &OrgTemplate{
Workspaces: []OrgWorkspace{
{
Name: "X",
RequiredEnv: strictReq("API_KEY"),
RecommendedEnv: strictReq("API_KEY"),
},
},
}
req, rec := collectOrgEnv(tmpl)
if len(req) != 1 || req[0].Name != "API_KEY" {
t.Errorf("required should contain API_KEY once, got %v", reqNames(req))
}
for _, r := range rec {
if r.Name == "API_KEY" {
t.Errorf("API_KEY must not appear in recommended when also required on same struct")
}
}
}
func TestCollectOrgEnv_RejectsInvalidNames(t *testing.T) {
// Names failing envVarNamePattern (lowercase, traversal, whitespace,
// shell metachars) must be dropped silently — the log line is not
// asserted here; the output slice assertion is enough to prove the
// filter fires.
tmpl := &OrgTemplate{
RequiredEnv: strictReq(
"VALID_ONE",
"lowercase_bad",
"../../etc/passwd",
"name with spaces",
"WITH-DASH",
"'; DROP TABLE users;--",
"",
"A", // single char — still valid per regex
),
}
req, _ := collectOrgEnv(tmpl)
if !stringSlicesEqual(reqNames(req), []string{"A", "VALID_ONE"}) {
t.Errorf("expected only valid names, got %v", reqNames(req))
}
}
// TestOrgTemplate_ClaudeAnyOfAuthPreflight exercises the shape the
// ux-ab-lab template ships with: a single any-of group at the org
// level covering ANTHROPIC_API_KEY vs. CLAUDE_CODE_OAUTH_TOKEN, plus
// two strict recommended entries (SERPER_API_KEY, VERCEL_TOKEN).
// Proves the end-to-end YAML → OrgTemplate → collectOrgEnv → IsSatisfied
// pipeline works for the canonical "Claude sub OR API key" pattern
// without depending on the on-disk template file (org-templates/ is
// populated by the clone-manifest, not tracked in this monorepo).
func TestOrgTemplate_ClaudeAnyOfAuthPreflight(t *testing.T) {
src := `
name: UX A/B Lab
required_env:
- any_of:
- ANTHROPIC_API_KEY
- CLAUDE_CODE_OAUTH_TOKEN
recommended_env:
- SERPER_API_KEY
- VERCEL_TOKEN
workspaces:
- name: Design Director
children:
- name: UX Researcher
- name: Visual Designer
- name: React Engineer
- name: Deploy Engineer
- name: A11y + SEO Auditor
- name: Perf Auditor
`
var tmpl OrgTemplate
if err := yaml.Unmarshal([]byte(src), &tmpl); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if len(tmpl.Workspaces) != 1 || len(tmpl.Workspaces[0].Children) != 6 {
t.Fatalf("expected 1 root with 6 children, got shape %+v", tmpl.Workspaces)
}
required, recommended := collectOrgEnv(&tmpl)
if len(required) != 1 {
t.Fatalf("expected 1 required requirement (the any-of group), got %d: %v", len(required), reqNames(required))
}
if required[0].Name != "" {
t.Errorf("expected any-of group, got strict name %q", required[0].Name)
}
wantMembers := []string{"ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"}
got := append([]string(nil), required[0].AnyOf...)
sort.Strings(got)
if !stringSlicesEqual(got, wantMembers) {
t.Errorf("any-of members mismatch: got %v, want %v", got, wantMembers)
}
// Either member should independently satisfy the group.
if !required[0].IsSatisfied(map[string]struct{}{"ANTHROPIC_API_KEY": {}}) {
t.Errorf("ANTHROPIC_API_KEY alone should satisfy the group")
}
if !required[0].IsSatisfied(map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}) {
t.Errorf("CLAUDE_CODE_OAUTH_TOKEN alone should satisfy the group")
}
if required[0].IsSatisfied(map[string]struct{}{"OPENAI_API_KEY": {}}) {
t.Errorf("unrelated key should NOT satisfy the group")
}
wantRec := []string{"SERPER_API_KEY", "VERCEL_TOKEN"}
if !stringSlicesEqual(reqNames(recommended), wantRec) {
t.Errorf("recommended mismatch: got %v, want %v", reqNames(recommended), wantRec)
}
}
// TestEnvRequirement_UnmarshalYAML proves the on-disk YAML shape
// (scalar OR `{any_of: [...]}` block) round-trips into EnvRequirement
// correctly. The preflight pipeline reads user-authored org.yaml
// files; a regression here would silently drop requirements.
func TestEnvRequirement_UnmarshalYAML(t *testing.T) {
src := `
required_env:
- GITHUB_TOKEN
- any_of:
- ANTHROPIC_API_KEY
- CLAUDE_CODE_OAUTH_TOKEN
`
var parsed struct {
RequiredEnv []EnvRequirement `yaml:"required_env"`
}
if err := yaml.Unmarshal([]byte(src), &parsed); err != nil {
t.Fatalf("unmarshal failed: %v", err)
}
if len(parsed.RequiredEnv) != 2 {
t.Fatalf("want 2 requirements, got %d", len(parsed.RequiredEnv))
}
if parsed.RequiredEnv[0].Name != "GITHUB_TOKEN" {
t.Errorf("first should be strict GITHUB_TOKEN, got %+v", parsed.RequiredEnv[0])
}
if parsed.RequiredEnv[1].Name != "" || len(parsed.RequiredEnv[1].AnyOf) != 2 {
t.Errorf("second should be any-of group, got %+v", parsed.RequiredEnv[1])
}
}
// TestEnvRequirement_UnmarshalYAML_RejectsEmptyAnyOf guards against a
// template that ships `any_of: []` — ambiguous semantics (impossible
// to satisfy), so the parser must fail loudly rather than silently
// pass a never-satisfiable requirement through the preflight.
func TestEnvRequirement_UnmarshalYAML_RejectsEmptyAnyOf(t *testing.T) {
src := `
required_env:
- any_of: []
`
var parsed struct {
RequiredEnv []EnvRequirement `yaml:"required_env"`
}
err := yaml.Unmarshal([]byte(src), &parsed)
if err == nil {
t.Errorf("expected error for empty any_of, got nil: %+v", parsed)
}
}
// ---------------------------------------------------------------------
// any_of group tests — the new EnvRequirement union shape allows a
// single requirement to be satisfied by any of a list of members (e.g.
// ANTHROPIC_API_KEY OR CLAUDE_CODE_OAUTH_TOKEN). collectOrgEnv +
// IsSatisfied together must handle this correctly.
// ---------------------------------------------------------------------
func TestEnvRequirement_IsSatisfied(t *testing.T) {
configured := map[string]struct{}{
"ANTHROPIC_API_KEY": {},
"GITHUB_TOKEN": {},
}
tests := []struct {
name string
req EnvRequirement
want bool
}{
{"strict present", EnvRequirement{Name: "ANTHROPIC_API_KEY"}, true},
{"strict absent", EnvRequirement{Name: "MISSING_KEY"}, false},
{"any-of first member present", anyOfReq("ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"), true},
{"any-of second member present", anyOfReq("CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_API_KEY"), true},
{"any-of none present", anyOfReq("OPENAI_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"), false},
{"any-of single member present", anyOfReq("GITHUB_TOKEN"), true},
}
for _, tt := range tests {
if got := tt.req.IsSatisfied(configured); got != tt.want {
t.Errorf("%s: got %v, want %v", tt.name, got, tt.want)
}
}
}
func TestCollectOrgEnv_AnyOfGroupPreserved(t *testing.T) {
// A group with two alternatives should come through as a single
// EnvRequirement carrying both members.
tmpl := &OrgTemplate{
RequiredEnv: []EnvRequirement{
anyOfReq("ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"),
},
}
req, _ := collectOrgEnv(tmpl)
if len(req) != 1 {
t.Fatalf("expected 1 requirement, got %d: %v", len(req), reqNames(req))
}
if req[0].Name != "" {
t.Errorf("expected any-of group, got strict name %q", req[0].Name)
}
wantMembers := []string{"ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"}
got := append([]string(nil), req[0].AnyOf...)
sort.Strings(got)
if !stringSlicesEqual(got, wantMembers) {
t.Errorf("any-of members mismatch: got %v, want %v", got, wantMembers)
}
}
func TestCollectOrgEnv_AnyOfGroupDedup(t *testing.T) {
// Two identical groups (members in different order) declared at
// different levels must collapse to one.
tmpl := &OrgTemplate{
RequiredEnv: []EnvRequirement{
anyOfReq("ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"),
},
Workspaces: []OrgWorkspace{
{
Name: "Root",
RequiredEnv: []EnvRequirement{
anyOfReq("CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_API_KEY"),
},
},
},
}
req, _ := collectOrgEnv(tmpl)
if len(req) != 1 {
t.Errorf("expected 1 requirement after dedup, got %d: %v", len(req), reqNames(req))
}
}
func TestCollectOrgEnv_StrictDominatesGroup(t *testing.T) {
// If a strict requirement X is declared anywhere, any-of groups
// that CONTAIN X are redundant — the strict requirement will force
// X to be configured, which satisfies any group mentioning it too.
// Same-tier pruning drops the group.
tmpl := &OrgTemplate{
RequiredEnv: []EnvRequirement{
{Name: "ANTHROPIC_API_KEY"},
anyOfReq("ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"),
},
}
req, _ := collectOrgEnv(tmpl)
if len(req) != 1 || req[0].Name != "ANTHROPIC_API_KEY" {
t.Errorf("strict should dominate group, got %v", reqNames(req))
}
}
func TestCollectOrgEnv_StrictRequiredDominatesRecommendedGroup(t *testing.T) {
// Cross-tier: a strict required X drops any-of groups in the
// recommended tier that mention X.
tmpl := &OrgTemplate{
RequiredEnv: strictReq("ANTHROPIC_API_KEY"),
RecommendedEnv: []EnvRequirement{
anyOfReq("ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN"),
{Name: "SLACK_WEBHOOK_URL"},
},
}
req, rec := collectOrgEnv(tmpl)
if len(req) != 1 || req[0].Name != "ANTHROPIC_API_KEY" {
t.Errorf("required mismatch: got %v", reqNames(req))
}
// The any-of group should have been pruned; only SLACK remains.
if len(rec) != 1 || rec[0].Name != "SLACK_WEBHOOK_URL" {
t.Errorf("recommended mismatch: got %v, want [SLACK_WEBHOOK_URL]", reqNames(rec))
}
}
func TestCollectOrgEnv_AnyOfWithInvalidMemberKeepsValidOnes(t *testing.T) {
// A group with one valid + one invalid member should keep the
// valid one (group carried by any remaining legitimate name). A
// group where ALL members are invalid is dropped entirely.
tmpl := &OrgTemplate{
RequiredEnv: []EnvRequirement{
anyOfReq("VALID_ONE", "lowercase_bad"),
anyOfReq("'; DROP TABLE;--", ""),
},
}
req, _ := collectOrgEnv(tmpl)
if len(req) != 1 {
t.Fatalf("expected 1 requirement, got %d: %v", len(req), reqNames(req))
}
// The remaining group has only one valid member, so it gets
// promoted to a single-name requirement (len(members)==1 path).
if req[0].Name != "VALID_ONE" && !stringSlicesEqual(req[0].AnyOf, []string{"VALID_ONE"}) {
t.Errorf("expected VALID_ONE to survive, got %v", reqNames(req))
}
}

View File

@ -454,6 +454,29 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
return
}
// Self-reported runtime wedge: takes precedence over the error_rate
// path. The heartbeat task lives in its own asyncio task and keeps
// firing 200s even after claude_agent_sdk locks up on
// `Control request timeout: initialize` — so error_rate stays at 0
// (no calls have been recorded as errors yet) while every actual
// /a2a POST hangs. The workspace tells us about that case via
// runtime_state="wedged"; we honor it directly. Sample_error from
// the heartbeat carries the human-readable reason ("SDK init
// timeout — restart workspace"), which the canvas surfaces in the
// degraded card without the operator scraping container logs.
if payload.RuntimeState == "wedged" && currentStatus == "online" {
_, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET status = 'degraded', updated_at = now() WHERE id = $1 AND status = 'online'`,
payload.WorkspaceID)
if err != nil {
log.Printf("Heartbeat: failed to mark %s degraded (wedged): %v", payload.WorkspaceID, err)
}
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_DEGRADED", payload.WorkspaceID, map[string]interface{}{
"runtime_state": "wedged",
"sample_error": payload.SampleError,
})
}
if currentStatus == "online" && payload.ErrorRate >= 0.5 {
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'degraded', updated_at = now() WHERE id = $1`, payload.WorkspaceID); err != nil {
log.Printf("Heartbeat: failed to mark %s degraded: %v", payload.WorkspaceID, err)
@ -464,7 +487,13 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
})
}
if currentStatus == "degraded" && payload.ErrorRate < 0.1 {
// Recovery from degraded → online when BOTH the error rate has
// fallen back AND the workspace is no longer reporting a wedge.
// The wedge condition is sticky for the process lifetime
// (claude_sdk_executor only clears it on restart), so when the
// container restarts and starts heartbeating fresh — RuntimeState
// is empty, error_rate is 0 — this branch flips us back to online.
if currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'online', updated_at = now() WHERE id = $1`, payload.WorkspaceID); err != nil {
log.Printf("Heartbeat: failed to recover %s to online: %v", payload.WorkspaceID, err)
}

View File

@ -298,6 +298,163 @@ func TestHeartbeatHandler_OnlineStaysOnline(t *testing.T) {
}
}
// ==================== Heartbeat — runtime wedge (claude_agent_sdk init timeout) ====================
// TestHeartbeatHandler_RuntimeWedged_FlipsOnlineToDegraded verifies the
// runtime_state="wedged" path. Heartbeat task in the workspace lives in
// its own asyncio task and keeps reporting online while the Claude SDK
// is wedged on Control request timeout; the workspace tells us about
// the wedge via this field, and we honor it by flipping status →
// degraded with the wedge reason in last_sample_error.
func TestHeartbeatHandler_RuntimeWedged_FlipsOnlineToDegraded(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
wedgeMsg := "claude_agent_sdk wedge: Control request timeout: initialize — restart workspace to recover"
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-wedged").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// Heartbeat UPDATE — sample_error carries the wedge reason from the
// workspace's _runtime_state_payload() helper.
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-wedged", 0.0, wedgeMsg, 0, 600, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// evaluateStatus: currentStatus = online
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-wedged").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
// The wedge-handling branch fires the degraded UPDATE with the
// `AND status = 'online'` guard (race-safe against concurrent
// removal). Match the SQL with the guard included.
mock.ExpectExec("UPDATE workspaces SET status = 'degraded'.*status = 'online'").
WithArgs("ws-wedged").
WillReturnResult(sqlmock.NewResult(0, 1))
// RecordAndBroadcast for WORKSPACE_DEGRADED
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-wedged","error_rate":0.0,"sample_error":"` + wedgeMsg + `","active_tasks":0,"uptime_seconds":600,"runtime_state":"wedged"}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestHeartbeatHandler_DegradedRecoversOnlyAfterWedgeClears verifies that
// the degraded → online recovery path requires BOTH error_rate < 0.1
// AND runtime_state cleared. A workspace still reporting wedged stays
// degraded even when error_rate happens to be 0 (no calls have been
// recorded as errors yet — the wedge is captured as a runtime state,
// not an error count).
func TestHeartbeatHandler_DegradedRecoversOnlyAfterWedgeClears(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-still-wedged").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-still-wedged", 0.0, "still broken", 0, 800, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// currentStatus = degraded
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-still-wedged").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
// No additional UPDATE expected — the recovery branch's
// `runtime_state == ""` guard blocks the flip back to online.
// (sqlmock fails the test if any unmocked Exec runs.)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-still-wedged","error_rate":0.0,"sample_error":"still broken","active_tasks":0,"uptime_seconds":800,"runtime_state":"wedged"}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestHeartbeatHandler_DegradedToOnline_AfterWedgeClears verifies the
// happy-path recovery: a workspace previously marked degraded is
// post-restart, error_rate is back to 0, and runtime_state is empty
// (the new process re-imported claude_sdk_executor with the flag
// fresh). Status flips back to online and a WORKSPACE_ONLINE event
// fires.
func TestHeartbeatHandler_DegradedToOnline_AfterWedgeClears(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-recovered").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-recovered", 0.0, "", 0, 30, "").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-recovered").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
// Recovery UPDATE fires (degraded → online).
mock.ExpectExec("UPDATE workspaces SET status = 'online'").
WithArgs("ws-recovered").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
// runtime_state intentionally absent (== ""); error_rate = 0; this
// is exactly what a freshly-restarted workspace's first heartbeat
// looks like.
body := `{"workspace_id":"ws-recovered","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":30}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ==================== UpdateCard ====================
func TestUpdateCard_Success(t *testing.T) {

View File

@ -466,3 +466,70 @@ func (h *SecretsHandler) GetModel(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"model": string(decrypted), "source": "workspace_secrets"})
}
// SetModel handles PUT /workspaces/:id/model — writes the model slug
// into workspace_secrets as MODEL_PROVIDER (the key GetModel reads).
// For hermes, the value is a hermes-native slug like "minimax/MiniMax-M2.7";
// for langgraph it's the legacy "provider:model" form. Either way it's just
// an opaque string the runtime interprets on its next start.
//
// Empty string clears the override. Triggers auto-restart so the new
// env (HERMES_DEFAULT_MODEL etc.) takes effect immediately — without
// this the user clicks Save+Restart, the canvas PUT lands, but the
// already-restarting container misses the window and boots with the
// old value.
func (h *SecretsHandler) SetModel(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
var body struct {
Model string `json:"model"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if body.Model == "" {
if _, err := db.DB.ExecContext(ctx,
`DELETE FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL_PROVIDER'`,
workspaceID); err != nil {
log.Printf("SetModel delete error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to clear model"})
return
}
if h.restartFunc != nil {
go h.restartFunc(workspaceID)
}
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
return
}
encrypted, err := crypto.Encrypt([]byte(body.Model))
if err != nil {
log.Printf("SetModel encrypt error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encrypt model"})
return
}
version := crypto.CurrentEncryptionVersion()
_, err = db.DB.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, 'MODEL_PROVIDER', $2, $3)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
`, workspaceID, encrypted, version)
if err != nil {
log.Printf("SetModel upsert error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save model"})
return
}
if h.restartFunc != nil {
go h.restartFunc(workspaceID)
}
c.JSON(http.StatusOK, gin.H{"status": "saved", "model": body.Model})
}

View File

@ -6,6 +6,7 @@ import (
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
@ -535,6 +536,88 @@ func TestSecretsGetModel_DBError(t *testing.T) {
}
}
// ==================== SetModel ====================
func TestSecretsSetModel_Upsert(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
restartCalled := make(chan string, 1)
handler := NewSecretsHandler(func(id string) { restartCalled <- id })
mock.ExpectExec(`INSERT INTO workspace_secrets`).
WithArgs("00000000-0000-0000-0000-000000000001", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000001/model",
strings.NewReader(`{"model":"minimax/MiniMax-M2.7"}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetModel(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
select {
case id := <-restartCalled:
if id != "00000000-0000-0000-0000-000000000001" {
t.Errorf("restart called with wrong id: %s", id)
}
case <-time.After(500 * time.Millisecond):
t.Error("restart was not triggered")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsSetModel_EmptyClears(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewSecretsHandler(func(string) {})
mock.ExpectExec(`DELETE FROM workspace_secrets`).
WithArgs("00000000-0000-0000-0000-000000000002").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000002"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000002/model",
strings.NewReader(`{"model":""}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetModel(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsSetModel_InvalidID(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewSecretsHandler(nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "not-a-uuid"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/not-a-uuid/model",
strings.NewReader(`{"model":"x"}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetModel(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for bad UUID, got %d", w.Code)
}
}
// ==================== Values — Phase 30.2 decrypted pull ====================
// These tests target the secrets.Values handler (GET /workspaces/:id/secrets/values)

View File

@ -5,6 +5,7 @@ package handlers
// Delete (cascade + purge), and input validation helpers.
import (
"context"
"database/sql"
"errors"
"fmt"
@ -12,6 +13,7 @@ import (
"net/http"
"path/filepath"
"strings"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
@ -390,44 +392,69 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
// Any concurrent heartbeat / registration / liveness-triggered restart
// will see status='removed' and bail out early.
//
// #1843: Stop() errors used to be silently swallowed. On the CP/EC2
// backend, Stop() calls the control plane's DELETE workspaces endpoint
// to terminate the EC2; if that errors (CP transient 5xx, network),
// the EC2 stays running with no DB row to track it — the
// "14 orphan workspace EC2s on a 0-customer account" scenario.
// Aggregate Stop failures and surface them as 500 so the client can
// retry. The retry replays Stop with the same instance_id (still
// readable from the row even after status='removed') — idempotent on
// the CP side. RemoveVolume errors stay log-and-continue: those are
// local cleanup of /var/data, not infra-leak class.
// Combines two concerns:
//
// 1. Detach cleanup from the request ctx via WithoutCancel + a 30s
// timeout, so when the canvas's `api.del` resolves on our 200
// (and gin cancels c.Request.Context()), in-flight Docker
// stop/remove calls don't get cancelled mid-operation. The
// previous shape leaked containers every time the canvas hung
// up promptly: Stop returned "context canceled", the container
// stayed up, and the next RemoveVolume failed with
// "volume in use". 30s is generous for Docker daemon round-
// trips (typical: <2s) and bounds a stuck daemon.
//
// 2. #1843: aggregate Stop() failures into stopErrs so the
// post-deletion block surfaces them as 500. On the CP/EC2
// backend, Stop() calls control plane's DELETE endpoint to
// terminate the EC2; if that errors (transient 5xx, network),
// the EC2 stays running with no DB row to track it (the
// "orphan EC2 on a 0-customer account" scenario). Loud-fail
// instead of silent-leak — clients retry, Stop's instance_id
// lookup is idempotent against status='removed'. RemoveVolume
// errors stay log-and-continue (local cleanup, not infra-leak).
cleanupCtx, cleanupCancel := context.WithTimeout(
context.WithoutCancel(ctx), 30*time.Second)
defer cleanupCancel()
var stopErrs []error
stopAndRemove := func(wsID string) {
if h.provisioner == nil {
return
}
// Check Stop's error before attempting RemoveVolume — the
// previous code discarded it and immediately tried the
// volume remove, which always fails with "volume in use"
// when Stop didn't actually kill the container. The orphan
// sweeper (registry/orphan_sweeper.go) catches what we
// skip here on the next reconcile pass.
if err := h.provisioner.Stop(cleanupCtx, wsID); err != nil {
log.Printf("Delete %s container stop failed: %v — leaving volume for orphan sweeper", wsID, err)
stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err))
return
}
if err := h.provisioner.RemoveVolume(cleanupCtx, wsID); err != nil {
log.Printf("Delete %s volume removal warning: %v", wsID, err)
}
}
for _, descID := range descendantIDs {
if h.provisioner != nil {
if err := h.provisioner.Stop(ctx, descID); err != nil {
log.Printf("Delete descendant %s stop error: %v", descID, err)
stopErrs = append(stopErrs, fmt.Errorf("stop descendant %s: %w", descID, err))
}
if err := h.provisioner.RemoveVolume(ctx, descID); err != nil {
log.Printf("Delete descendant %s volume removal warning: %v", descID, err)
}
}
db.ClearWorkspaceKeys(ctx, descID)
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", descID, map[string]interface{}{})
stopAndRemove(descID)
db.ClearWorkspaceKeys(cleanupCtx, descID)
// Detach broadcaster ctx for the same reason as the cleanup
// above — RecordAndBroadcast does an INSERT INTO
// structure_events + Redis Publish. If the canvas hangs up,
// a request-ctx-bound INSERT can be cancelled mid-write,
// leaving other WS clients ignorant of the cascade. The DB
// row is already 'removed' so it's recoverable, but the
// inconsistency is avoidable.
h.broadcaster.RecordAndBroadcast(cleanupCtx, "WORKSPACE_REMOVED", descID, map[string]interface{}{})
}
// Stop + remove volume for the workspace itself
if h.provisioner != nil {
if err := h.provisioner.Stop(ctx, id); err != nil {
log.Printf("Delete %s stop error: %v", id, err)
stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", id, err))
}
if err := h.provisioner.RemoveVolume(ctx, id); err != nil {
log.Printf("Delete %s volume removal warning: %v", id, err)
}
}
db.ClearWorkspaceKeys(ctx, id)
stopAndRemove(id)
db.ClearWorkspaceKeys(cleanupCtx, id)
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", id, map[string]interface{}{
h.broadcaster.RecordAndBroadcast(cleanupCtx, "WORKSPACE_REMOVED", id, map[string]interface{}{
"cascade_deleted": len(descendantIDs),
})

View File

@ -176,20 +176,33 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri
// Try to recover by applying the runtime-default template. payload.Runtime
// is populated by the caller (Restart handler / Create handler) from the
// DB row — same source of truth the apply_template=true path uses.
// Try `<runtime>-default` first (historical naming), then plain
// `<runtime>` (current naming in workspace-configs-templates/).
// Only claude-code has the `-default` suffix; every other
// runtime directory uses the bare name. Without the bare-name
// fallback, recovery only worked for claude-code and blank
// workspaces on every other runtime bricked on first start.
recovered := false
if payload.Runtime != "" {
runtimeTemplate := filepath.Join(h.configsDir, payload.Runtime+"-default")
if _, statErr := os.Stat(runtimeTemplate); statErr == nil {
log.Printf("Provisioner: auto-recover for %s — config volume empty, applying %s-default template (#1858)",
workspaceID, payload.Runtime)
templatePath = runtimeTemplate
// Rebuild cfg with the recovered template path so Start() sees it.
cfg = h.buildProvisionerConfig(workspaceID, templatePath, configFiles, payload, envVars, pluginsPath, awarenessNamespace)
cfg.ResetClaudeSession = resetClaudeSession
recovered = true
} else {
log.Printf("Provisioner: auto-recover for %s — runtime template %s not found: %v",
workspaceID, runtimeTemplate, statErr)
candidates := []string{
filepath.Join(h.configsDir, payload.Runtime+"-default"),
filepath.Join(h.configsDir, payload.Runtime),
}
for _, runtimeTemplate := range candidates {
if _, statErr := os.Stat(runtimeTemplate); statErr == nil {
log.Printf("Provisioner: auto-recover for %s — config volume empty, applying %s template (#1858)",
workspaceID, filepath.Base(runtimeTemplate))
templatePath = runtimeTemplate
// Rebuild cfg with the recovered template path so Start() sees it.
cfg = h.buildProvisionerConfig(workspaceID, templatePath, configFiles, payload, envVars, pluginsPath, awarenessNamespace)
cfg.ResetClaudeSession = resetClaudeSession
recovered = true
break
}
}
if !recovered {
log.Printf("Provisioner: auto-recover for %s — no template found under %s for runtime=%s",
workspaceID, h.configsDir, payload.Runtime)
}
}
@ -616,6 +629,17 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
// payload.Model at boot), this is a no-op — no harm in the switch
// being empty for those cases.
func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// Fall back to the MODEL_PROVIDER workspace secret when the caller
// didn't pass one explicitly. This is the path that "Save+Restart"
// hits — Restart builds its payload from the workspaces row (no model
// column there) so payload.Model is always empty, but the user's
// canvas selection was stored as MODEL_PROVIDER via PUT /model and
// is already loaded into envVars here. Without this fallback hermes
// silently boots with the template default and errors "No LLM
// provider configured" even though the user picked a valid model.
if model == "" {
model = envVars["MODEL_PROVIDER"]
}
if model == "" {
return
}

View File

@ -14,6 +14,30 @@ import (
"github.com/gin-gonic/gin"
)
// abortAuthLookupError is the single response shape for "the auth
// middleware tried to validate a token but the underlying datastore
// lookup failed." Returns 503 (not 500) because the right semantic
// is "platform infrastructure unavailable, retry shortly" — not
// "internal server error in our application logic". The structured
// `code` lets the canvas distinguish this from generic 5xx and
// surface a dedicated diagnostic ("Postgres/Redis unreachable —
// check local services") instead of a confusing
// `auth check failed` toast.
//
// `where` is included in the log line so the operator can grep
// which call site fired (WorkspaceAuth vs AdminAuth, the
// HasAnyLiveTokenGlobal probe vs orgtoken.Validate). The
// user-visible body deliberately does NOT include the underlying
// error string — that could leak DB hostnames, connection-string
// fragments, or internal code paths.
func abortAuthLookupError(c *gin.Context, where string, err error) {
log.Printf("wsauth: %s: datastore lookup failed (returning 503): %v", where, err)
c.AbortWithStatusJSON(http.StatusServiceUnavailable, gin.H{
"error": "platform datastore unavailable — retry shortly",
"code": "platform_unavailable",
})
}
// WorkspaceAuth returns a Gin middleware that enforces per-workspace bearer-token
// authentication on /workspaces/:id/* sub-routes.
//
@ -73,8 +97,7 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
c.Next()
return
} else if !errors.Is(err, orgtoken.ErrInvalidToken) {
log.Printf("wsauth: WorkspaceAuth: orgtoken.Validate: %v", err)
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
abortAuthLookupError(c, "WorkspaceAuth: orgtoken.Validate", err)
return
}
// Per-workspace token — narrowest scope, bound to this :id.
@ -136,8 +159,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
if err != nil {
log.Printf("wsauth: AdminAuth: HasAnyLiveTokenGlobal failed: %v", err)
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
abortAuthLookupError(c, "AdminAuth: HasAnyLiveTokenGlobal", err)
return
}
if !hasLive {
@ -214,8 +236,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
return
} else if !errors.Is(err, orgtoken.ErrInvalidToken) {
// DB error — fail closed and log. Don't expose DB text.
log.Printf("wsauth: AdminAuth: orgtoken.Validate: %v", err)
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
abortAuthLookupError(c, "AdminAuth: orgtoken.Validate", err)
return
}

View File

@ -2,8 +2,11 @@ package middleware
import (
"crypto/sha256"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
@ -1699,3 +1702,57 @@ func TestAdminAuth_684_SpecificRoutes_NoBearer_Returns401(t *testing.T) {
})
}
}
// ==================== platform-unavailable classification ====================
//
// abortAuthLookupError replaces the prior opaque
// `500 {"error":"auth check failed"}` with a 503 + structured code so
// the canvas can render a dedicated diagnostic instead of a confusing
// toast. Pin both the status code and the body shape against
// regression — this is the contract the canvas's
// PlatformUnavailableError classifier reads at api.ts.
func TestAdminAuth_DatastoreError_Returns503PlatformUnavailable(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
// Simulate Postgres being down — HasAnyLiveTokenGlobal's COUNT
// query returns a connection error.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnError(errors.New("dial tcp [::1]:5432: connect: connection refused"))
r := gin.New()
r.GET("/workspaces", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, "/workspaces", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("response body must be JSON: %v (body=%s)", err, w.Body.String())
}
if resp["code"] != "platform_unavailable" {
t.Errorf("response code = %v, want platform_unavailable (canvas reads this for the dedicated diagnostic)", resp["code"])
}
if _, ok := resp["error"].(string); !ok {
t.Errorf("response must include human-readable error string, got %v", resp["error"])
}
// The body must NOT leak the underlying DB error string —
// production hostnames / connection-string fragments could land
// in an error toast otherwise.
if errStr, _ := resp["error"].(string); strings.Contains(errStr, "dial tcp") {
t.Errorf("response leaks underlying DB error: %q", errStr)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}

View File

@ -57,6 +57,19 @@ type HeartbeatPayload struct {
// a previously-reported spend value. Any non-zero value is clamped to
// [0, maxMonthlySpend] before the DB write. (#615)
MonthlySpend int64 `json:"monthly_spend"`
// RuntimeState is a self-reported runtime health flag separate from
// "is the heartbeat task firing at all". The heartbeat task lives in
// its own asyncio task and keeps pinging even when the agent runtime
// is wedged (e.g. claude_agent_sdk's `Control request timeout:
// initialize` leaves the SDK in a permanent error state for the
// process lifetime). RuntimeState is how the workspace tells the
// platform "I'm alive but my Claude runtime is broken — flip me to
// degraded so the canvas can show a Restart hint."
//
// Empty string = healthy / no signal. The only currently-recognised
// non-empty value is "wedged"; future values can extend this without
// migration.
RuntimeState string `json:"runtime_state"`
}
type UpdateCardPayload struct {

View File

@ -17,6 +17,7 @@ import (
"time"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
dockerimage "github.com/docker/docker/api/types/image"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/api/types/volume"
@ -143,6 +144,62 @@ func ContainerName(workspaceID string) string {
return fmt.Sprintf("ws-%s", id)
}
// containerNamePrefix is the shared prefix every workspace container
// name carries (`ws-`). Used by ListWorkspaceContainerIDPrefixes for
// the Docker name-filter, and by the orphan sweeper to recognise our
// own containers vs. anything else on the host.
const containerNamePrefix = "ws-"
// ListWorkspaceContainerIDPrefixes returns the 12-char workspace ID
// prefixes of every running ws-* container the Docker daemon knows
// about. The 12-char form matches ContainerName's truncation, so the
// orphan sweeper can intersect this set against `SELECT
// substring(id::text, 1, 12) FROM workspaces WHERE status = 'removed'`
// without an extra round-trip per row.
//
// Returns an empty slice on any Docker error (sweeper treats that as
// "skip this round" — better than a partial scan that misses leaks).
func (p *Provisioner) ListWorkspaceContainerIDPrefixes(ctx context.Context) ([]string, error) {
if p == nil || p.cli == nil {
return nil, nil
}
containers, err := p.cli.ContainerList(ctx, container.ListOptions{
// All=true catches stopped-but-not-removed containers too —
// those still hold their volume references and would block
// RemoveVolume just like a running container would.
All: true,
Filters: filters.NewArgs(filters.Arg("name", containerNamePrefix)),
})
if err != nil {
return nil, err
}
prefixes := make([]string, 0, len(containers))
for _, c := range containers {
// Container names from the API include a leading slash:
// "/ws-abc123def456". Strip both the slash and our prefix
// to recover the 12-char workspace ID.
//
// The Docker name filter is a SUBSTRING match (not a prefix
// match), so something like "my-ws-thing" would also be
// returned. The HasPrefix check below is load-bearing:
// without it those false positives would flow into the
// orphan sweeper's DB query as bogus LIKE patterns.
for _, name := range c.Names {
n := strings.TrimPrefix(name, "/")
if !strings.HasPrefix(n, containerNamePrefix) {
continue
}
id := strings.TrimPrefix(n, containerNamePrefix)
if id == "" {
continue
}
prefixes = append(prefixes, id)
break // one name is enough; multiple aliases would dup
}
}
return prefixes, nil
}
// InternalURL returns the Docker-internal URL for a workspace container.
func InternalURL(workspaceID string) string {
return fmt.Sprintf("http://%s:%s", ContainerName(workspaceID), DefaultPort)
@ -832,6 +889,14 @@ func (p *Provisioner) RemoveVolume(ctx context.Context, workspaceID string) erro
// restart policy: if we ContainerStop first, the restart policy can
// respawn the container before ContainerRemove runs, leaving a zombie
// that re-registers via heartbeat after deletion.
//
// Returns nil on success AND on "container does not exist" (the cleanup
// goal is achieved either way). Returns the underlying Docker error
// only when the daemon actually failed to remove a live container —
// callers that follow Stop with RemoveVolume MUST check the return
// and skip volume removal on a real error, otherwise the volume
// removal will fail with "volume in use" because the container is
// still alive.
func (p *Provisioner) Stop(ctx context.Context, workspaceID string) error {
if p == nil || p.cli == nil {
return ErrNoBackend
@ -839,15 +904,23 @@ func (p *Provisioner) Stop(ctx context.Context, workspaceID string) error {
name := ContainerName(workspaceID)
// Force-remove kills and removes in one atomic operation, bypassing
// the restart policy entirely. If the container doesn't exist, the
// error is harmless.
if err := p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true}); err != nil {
// Container may already be gone — log but don't fail.
log.Printf("Provisioner: force-remove warning for %s: %v", name, err)
// the restart policy entirely.
err := p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true})
if err == nil {
log.Printf("Provisioner: stopped and removed container %s", name)
return nil
}
log.Printf("Provisioner: stopped and removed container %s", name)
return nil
if isContainerNotFound(err) {
// Container was already gone — the post-condition we want is
// satisfied. Don't surface as an error.
log.Printf("Provisioner: container %s already gone (no-op)", name)
return nil
}
// Real failure: daemon timeout, socket EOF, ctx cancellation, etc.
// Caller (workspace_crud.stopAndRemove, orphan_sweeper.sweepOnce)
// must propagate this so they can skip the follow-up RemoveVolume.
log.Printf("Provisioner: force-remove failed for %s: %v", name, err)
return fmt.Errorf("force-remove %s: %w", name, err)
}
// IsRunning checks if a workspace container is currently running.

View File

@ -0,0 +1,186 @@
package registry
// orphan_sweeper.go — periodic reconcile pass that cleans up Docker
// containers whose corresponding workspace row in Postgres has
// status='removed'. Defence in depth on top of the inline cleanup
// in handlers/workspace_crud.go.
//
// Why this exists: the inline cleanup is one-shot — if Docker hiccups
// (daemon restart, host load, transient API error), the container
// silently stays alive while the DB row is already 'removed'. Without
// a reconcile pass those leaks accumulate forever. With one, every
// missed cleanup heals on the next sweep.
//
// Cost: O(running containers) per cycle, not O(historical removed
// rows). The Docker name filter trims the candidate set to ws-* only
// (typically the same handful as ContainerList without filter on a
// dev host); the DB lookup is one indexed query against the
// idx_workspaces_status btree.
import (
"context"
"log"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/lib/pq"
)
// OrphanReaper is the dependency the sweeper takes from provisioner.
// Extracted as an interface so the sweeper is unit-testable without
// a real Docker daemon — matches the ContainerChecker pattern in
// healthsweep.go. *provisioner.Provisioner satisfies this naturally.
type OrphanReaper interface {
ListWorkspaceContainerIDPrefixes(ctx context.Context) ([]string, error)
Stop(ctx context.Context, workspaceID string) error
RemoveVolume(ctx context.Context, workspaceID string) error
}
// isLikelyWorkspaceID accepts strings shaped like a UUID prefix —
// hex chars and `-` only. Workspace IDs are full UUIDs and the
// container-name truncation keeps the hex prefix intact, so any
// container name that doesn't match this is by definition not one
// of ours and should be skipped. Also doubles as a SQL LIKE
// wildcard guard (rejects `_` and `%`).
func isLikelyWorkspaceID(s string) bool {
if s == "" {
return false
}
for _, r := range s {
switch {
case r >= '0' && r <= '9':
case r >= 'a' && r <= 'f':
case r >= 'A' && r <= 'F':
case r == '-':
default:
return false
}
}
return true
}
// OrphanSweepInterval is the cadence of the reconcile loop. 60s
// matches the heartbeat cadence (30s) × 2 — a single missed cleanup
// surfaces within ~90s end-to-end (canvas delete → next sweep tick →
// container gone). Faster cycles would just pay Docker API cost for
// no UX win; slower would let leaks linger long enough to compound
// CPU pressure on dev hosts.
const OrphanSweepInterval = 60 * time.Second
// orphanSweepDeadline bounds a single sweep cycle. A daemon at the
// edge of timing out shouldn't accumulate goroutines. 30s is generous
// for a dev host with dozens of containers and a busy daemon.
const orphanSweepDeadline = 30 * time.Second
// StartOrphanSweeper runs the reconcile loop until ctx is cancelled.
// nil reaper makes the loop a no-op (matches handlers'
// nil-provisioner-tolerant pattern — some test harnesses run without
// Docker available).
func StartOrphanSweeper(ctx context.Context, reaper OrphanReaper) {
if reaper == nil {
log.Println("Orphan sweeper: reaper is nil — sweeper disabled")
return
}
log.Printf("Orphan sweeper started — reconciling every %s", OrphanSweepInterval)
ticker := time.NewTicker(OrphanSweepInterval)
defer ticker.Stop()
// Run once immediately so a platform restart cleans up any
// containers leaked while we were down — don't make the user
// wait 60s for the first reconcile.
sweepOnce(ctx, reaper)
for {
select {
case <-ctx.Done():
log.Println("Orphan sweeper: shutdown")
return
case <-ticker.C:
sweepOnce(ctx, reaper)
}
}
}
func sweepOnce(parent context.Context, reaper OrphanReaper) {
ctx, cancel := context.WithTimeout(parent, orphanSweepDeadline)
defer cancel()
prefixes, err := reaper.ListWorkspaceContainerIDPrefixes(ctx)
if err != nil {
log.Printf("Orphan sweeper: ListWorkspaceContainerIDPrefixes failed: %v — skipping cycle", err)
return
}
if len(prefixes) == 0 {
return
}
// Resolve each prefix to a full workspace_id whose status is
// 'removed'. The platform's workspace IDs are full UUIDs but
// container names are truncated to 12 chars — an UPPER BOUND
// of one match per prefix is guaranteed by the DB (UUID v4
// collisions in the first 12 chars across active rows are
// statistically negligible). Use a single IN-style query so
// the cost is one round-trip regardless of leak count.
//
// Defence: drop any prefix whose contents fall outside the
// hex-and-dash UUID alphabet. Workspace IDs are UUIDs, so
// container names follow ws-<12 hex chars>. Anything else is
// either a non-workspace container that slipped past the
// substring-match Docker filter (workspace-runner, etc.) or a
// malformed entry — neither should be turned into a LIKE
// pattern. Also blocks SQL LIKE wildcards (`_` and `%`) from
// reaching the query, even though Docker's container-name
// validation would already have rejected them upstream.
likes := make([]string, 0, len(prefixes))
for _, p := range prefixes {
if !isLikelyWorkspaceID(p) {
continue
}
likes = append(likes, p+"%")
}
if len(likes) == 0 {
return
}
rows, err := db.DB.QueryContext(ctx, `
SELECT id::text
FROM workspaces
WHERE status = 'removed'
AND id::text LIKE ANY($1::text[])
`, pq.Array(likes))
if err != nil {
log.Printf("Orphan sweeper: DB query failed: %v — skipping cycle", err)
return
}
defer rows.Close()
var orphanIDs []string
for rows.Next() {
var id string
if scanErr := rows.Scan(&id); scanErr != nil {
log.Printf("Orphan sweeper: row scan failed: %v", scanErr)
continue
}
orphanIDs = append(orphanIDs, id)
}
if err := rows.Err(); err != nil {
log.Printf("Orphan sweeper: rows iteration failed: %v", err)
return
}
for _, id := range orphanIDs {
log.Printf("Orphan sweeper: stopping leaked container for removed workspace %s", id)
if stopErr := reaper.Stop(ctx, id); stopErr != nil {
// Stop returns the wrapped Docker error (treating
// "container not found" as nil-success via
// isContainerNotFound), so a non-nil here means the
// container is genuinely still alive — daemon timeout,
// ctx cancellation, or a transient socket EOF.
// Skip RemoveVolume so we don't fall into the same
// Stop-failed-then-volume-in-use trap that motivated
// this sweeper. The next cycle (60s out) retries Stop.
log.Printf("Orphan sweeper: Stop failed for %s: %v — leaving volume for next cycle", id, stopErr)
continue
}
if rmErr := reaper.RemoveVolume(ctx, id); rmErr != nil {
log.Printf("Orphan sweeper: RemoveVolume warning for %s: %v", id, rmErr)
}
}
}

View File

@ -0,0 +1,255 @@
package registry
import (
"context"
"errors"
"sync"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
)
// fakeReaper is a hand-rolled OrphanReaper for the sweeper tests.
// Records every Stop / RemoveVolume call so tests can assert which
// workspace IDs got reconciled.
type fakeReaper struct {
mu sync.Mutex
listResponse []string
listErr error
stopErr map[string]error
removeVolErr map[string]error
stopCalls []string
removeVolCalls []string
}
func (f *fakeReaper) ListWorkspaceContainerIDPrefixes(_ context.Context) ([]string, error) {
if f.listErr != nil {
return nil, f.listErr
}
return f.listResponse, nil
}
func (f *fakeReaper) Stop(_ context.Context, wsID string) error {
f.mu.Lock()
defer f.mu.Unlock()
f.stopCalls = append(f.stopCalls, wsID)
return f.stopErr[wsID]
}
func (f *fakeReaper) RemoveVolume(_ context.Context, wsID string) error {
f.mu.Lock()
defer f.mu.Unlock()
f.removeVolCalls = append(f.removeVolCalls, wsID)
return f.removeVolErr[wsID]
}
// TestSweepOnce_ReconcilesRunningRemovedRows — the core reconcile
// behavior: a container running for a workspace whose DB row is
// 'removed' gets stopped + volume removed.
func TestSweepOnce_ReconcilesRunningRemovedRows(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Docker reports two ws-* containers; one's row is 'removed'
// (the leak), the other's is 'online' (the DB rightly excludes
// it from the WHERE clause and we should NOT reap it).
reaper := &fakeReaper{
listResponse: []string{"abc123def456", "xyz789ghi012"},
}
// The query asks for status='removed' rows whose id matches the
// LIKE patterns built from the running container prefixes. Mock
// returns only the leaked one as a UUID-shaped full id.
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("abc123def456-0000-0000-0000-000000000000"))
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 1 || reaper.stopCalls[0] != "abc123def456-0000-0000-0000-000000000000" {
t.Errorf("Stop calls = %v, want exactly the leaked id", reaper.stopCalls)
}
if len(reaper.removeVolCalls) != 1 || reaper.removeVolCalls[0] != "abc123def456-0000-0000-0000-000000000000" {
t.Errorf("RemoveVolume calls = %v, want exactly the leaked id", reaper.removeVolCalls)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_NoRunningContainers — Docker returns nothing, sweeper
// short-circuits without a DB query (no leak possible if no
// containers exist).
func TestSweepOnce_NoRunningContainers(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{listResponse: nil}
// No DB query expected — if sweepOnce makes one anyway the
// sqlmock will fail "unexpected query".
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 0 {
t.Errorf("Stop should not fire when no containers exist; got %v", reaper.stopCalls)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_DockerListErrorSkipsCycle — a Docker daemon hiccup
// must not cascade into a DB query (otherwise we'd reap based on
// stale information). Skip the cycle, retry next tick.
func TestSweepOnce_DockerListErrorSkipsCycle(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{listErr: errors.New("daemon unreachable")}
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 0 {
t.Errorf("Stop must not fire when Docker list failed; got %v", reaper.stopCalls)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_StopFailureLeavesVolume — if Stop fails, RemoveVolume
// MUST NOT fire. This is the same trap that motivated the sweeper:
// removing a volume held by a still-running container always errors
// with "volume in use", and we'd accumulate noise in the log without
// actually fixing anything. Leave the volume for the next sweep
// (which will retry Stop).
func TestSweepOnce_StopFailureLeavesVolume(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{
listResponse: []string{"abc123def456"},
stopErr: map[string]error{
"abc123def456-0000-0000-0000-000000000000": errors.New("docker daemon timeout"),
},
}
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("abc123def456-0000-0000-0000-000000000000"))
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 1 {
t.Errorf("Stop should have been attempted exactly once, got %v", reaper.stopCalls)
}
if len(reaper.removeVolCalls) != 0 {
t.Errorf("RemoveVolume must not fire when Stop failed; got %v", reaper.removeVolCalls)
}
}
// TestSweepOnce_VolumeRemoveErrorIsNonFatal — RemoveVolume failures
// are logged but don't prevent processing other orphans in the same
// cycle. Belt + braces against a transient daemon issue mid-loop.
func TestSweepOnce_VolumeRemoveErrorIsNonFatal(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{
listResponse: []string{"aaa111bbb222", "ccc333ddd444"},
removeVolErr: map[string]error{
"aaa111bbb222-0000-0000-0000-000000000000": errors.New("volume not found"),
},
}
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("aaa111bbb222-0000-0000-0000-000000000000").
AddRow("ccc333ddd444-0000-0000-0000-000000000000"))
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 2 {
t.Errorf("both orphans should have been Stopped; got %v", reaper.stopCalls)
}
if len(reaper.removeVolCalls) != 2 {
t.Errorf("both orphans should have had RemoveVolume attempted; got %v", reaper.removeVolCalls)
}
}
// TestSweepOnce_FiltersNonWorkspacePrefixes — the Docker name filter
// is a SUBSTRING match so containers like "my-ws-thing" can slip
// through. The HasPrefix check in the provisioner trims those, but
// the in-sweeper isLikelyWorkspaceID guard is the second line of
// defence: anything outside the UUID alphabet (hex + dashes) is
// rejected before being turned into a SQL LIKE pattern. Locks in
// that no DB query fires when every prefix is filtered out.
func TestSweepOnce_FiltersNonWorkspacePrefixes(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{
listResponse: []string{
"not_a_uuid_at_all", // underscore not in UUID alphabet
"contains%wildcard", // SQL LIKE wildcard — must not reach the query
"contains_wildcard", // SQL LIKE single-char wildcard
"", // empty
"valid-but-non-workspace-name", // dash + lowercase letters that aren't hex
},
}
// No DB query expected — every prefix is rejected before the
// query builds, so we short-circuit. sqlmock fails on any
// unexpected query.
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 0 {
t.Errorf("Stop must not fire when all prefixes filtered; got %v", reaper.stopCalls)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestIsLikelyWorkspaceID — pin the alphabet directly. This is the
// guard that prevents SQL LIKE wildcards (`%`, `_`) from reaching
// the sweeper's query.
func TestIsLikelyWorkspaceID(t *testing.T) {
cases := []struct {
in string
want bool
}{
{"abc123def456", true},
{"abcdef-1234-5678-90ab-cdef00112233", true},
{"ABC123DEF456", true}, // uppercase hex still allowed
{"", false},
{"abc_123", false}, // underscore (SQL LIKE single-char wildcard)
{"abc%123", false}, // percent (SQL LIKE multi-char wildcard)
{"hello world", false}, // space, non-hex letters
{"valid-but-not", false}, // 'l', 't', 'n' aren't hex
{"abc 123", false},
{".../escape", false},
}
for _, tc := range cases {
got := isLikelyWorkspaceID(tc.in)
if got != tc.want {
t.Errorf("isLikelyWorkspaceID(%q) = %v, want %v", tc.in, got, tc.want)
}
}
}
// TestStartOrphanSweeper_NilReaperIsNoOp — tolerance for the
// nil-provisioner path used by some test harnesses.
func TestStartOrphanSweeper_NilReaperIsNoOp(t *testing.T) {
// Should return immediately without panicking. Wrap in a goroutine
// + done-channel so we can assert it didn't block.
done := make(chan struct{})
go func() {
StartOrphanSweeper(context.Background(), nil)
close(done)
}()
select {
case <-done:
// expected
case <-time.After(500 * time.Millisecond):
t.Fatal("StartOrphanSweeper(nil) blocked instead of returning immediately")
}
}

View File

@ -308,6 +308,7 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
wsAuth.PUT("/secrets", sech.Set)
wsAuth.DELETE("/secrets/:key", sech.Delete)
wsAuth.GET("/model", sech.GetModel)
wsAuth.PUT("/model", sech.SetModel)
// Token usage metrics — cost transparency (#593).
// WorkspaceAuth middleware (on wsAuth) binds the bearer to :id.
@ -481,6 +482,14 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
wsAuth.PUT("/files/*path", tmplh.WriteFile)
wsAuth.DELETE("/files/*path", tmplh.DeleteFile)
// Chat attachments — file upload (user → agent) and binary-safe
// streaming download (agent → user). Namespaced under /chat/ so
// the security model is obviously distinct from /files/* (which
// handles workspace config/templates and has a different caller).
chatfh := handlers.NewChatFilesHandler(tmplh)
wsAuth.POST("/chat/uploads", chatfh.Upload)
wsAuth.GET("/chat/download", chatfh.Download)
// Plugins
pluginsDir := findPluginsDir(configsDir)
// Runtime lookup lets the plugins handler filter the registry to plugins

View File

@ -0,0 +1,24 @@
-- 043_workspace_status_enum.down.sql
--
-- Reverse 043_workspace_status_enum.up.sql: convert workspaces.status
-- back to plain TEXT and drop the workspace_status enum type.
BEGIN;
-- Symmetric with the up migration: a rollback under the same load
-- that motivated the up-file's 5s lock_timeout would otherwise stall
-- writers indefinitely.
SET LOCAL lock_timeout = '5s';
ALTER TABLE workspaces
ALTER COLUMN status DROP DEFAULT;
ALTER TABLE workspaces
ALTER COLUMN status TYPE TEXT USING status::TEXT;
ALTER TABLE workspaces
ALTER COLUMN status SET DEFAULT 'provisioning';
DROP TYPE workspace_status;
COMMIT;

View File

@ -0,0 +1,84 @@
-- 043_workspace_status_enum.up.sql
--
-- Convert workspaces.status from free-form TEXT to a real Postgres
-- ENUM type. The previous shape (TEXT DEFAULT 'provisioning' with no
-- CHECK constraint, set by 001_workspaces.sql) let any handler write
-- any string, including typos and stale values from older code paths.
-- Locking the value set forces every writer to use one of the agreed
-- states and lets us add a new state (`degraded`, used by the SDK
-- wedge detector landing in this same change) without losing type
-- safety on the column.
--
-- Value set covers every status the production codebase actually writes:
--
-- provisioning — workspace row exists, container is being created
-- (initial INSERT default)
-- online — heartbeat fresh + last response was successful
-- offline — Redis liveness key expired (ws-side dead) or
-- the proxy detected an unreachable upstream
-- degraded — runtime is alive but reporting trouble (heartbeat
-- error_rate >= 0.5, OR new in this change:
-- workspace explicitly reported runtime_state="wedged")
-- failed — provisioning never completed, or workspace marked
-- itself failed via bundle import / runtime crash
-- removed — soft-delete tombstone; the row stays so foreign-
-- key references survive but no operations target it
-- paused — operator-initiated suspend via workspace_restart's
-- pause path (workspace_restart.go:406)
-- hibernated — auto-suspended after idle threshold; container
-- stopped but row preserved (workspace_restart.go:283,
-- introduced by migration 029_workspace_hibernation)
--
-- Sweep of every `UPDATE workspaces SET status = 'X'` in the
-- workspace-server/internal/ tree (excluding tests) verified the
-- value set. Adding a new state in the future requires both updating
-- this enum (a separate `ALTER TYPE … ADD VALUE` migration) AND any
-- writers — the enum will reject unknown strings at insert/update
-- time, which is the exact failure mode this migration is meant to
-- give us.
--
-- Deployment: `ALTER TABLE … ALTER COLUMN TYPE` takes ACCESS
-- EXCLUSIVE on workspaces. A long-running SELECT against the table
-- will block the migration; the migration will then block every
-- writer behind it. `SET lock_timeout` aborts the migration in 5s
-- if it can't acquire the lock — preferable to stalling the whole
-- workspace fleet behind one slow query.
BEGIN;
SET LOCAL lock_timeout = '5s';
CREATE TYPE workspace_status AS ENUM (
'provisioning',
'online',
'offline',
'degraded',
'failed',
'removed',
'paused',
'hibernated'
);
-- The two-step ALTER (DROP DEFAULT then change type then SET DEFAULT)
-- is required because Postgres rejects an ALTER COLUMN TYPE on a
-- column that has a DEFAULT whose expression doesn't match the new
-- type. The intermediate moment with no default is fine — no INSERT
-- happens between these statements inside the same transaction.
--
-- The `USING status::workspace_status` cast is the type-conversion
-- expression Postgres needs when the source and target types aren't
-- assignment-compatible. If any existing row has a status value
-- outside the enum's set, this statement aborts the transaction and
-- the migration leaves the table untouched — that's the correct
-- behavior (we'd want to know about the rogue value before locking
-- the type).
ALTER TABLE workspaces
ALTER COLUMN status DROP DEFAULT;
ALTER TABLE workspaces
ALTER COLUMN status TYPE workspace_status USING status::workspace_status;
ALTER TABLE workspaces
ALTER COLUMN status SET DEFAULT 'provisioning'::workspace_status;
COMMIT;

View File

@ -10,7 +10,7 @@ import uuid
import httpx
from platform_auth import auth_headers
from platform_auth import auth_headers, self_source_headers
logger = logging.getLogger(__name__)
@ -56,9 +56,15 @@ async def send_a2a_message(target_url: str, message: str) -> str:
timeout=httpx.Timeout(connect=30.0, read=300.0, write=30.0, pool=30.0)
) as client:
try:
# self_source_headers() includes X-Workspace-ID so the
# platform's a2a_receive logger records source_id =
# WORKSPACE_ID. Otherwise peer-A2A messages — including
# the case where target_url resolves to this workspace's
# own /a2a — get logged with source_id=NULL and surface
# in the recipient's My Chat tab as user-typed input.
resp = await client.post(
target_url,
headers=auth_headers(),
headers=self_source_headers(WORKSPACE_ID),
json={
"jsonrpc": "2.0",
"id": str(uuid.uuid4()),
@ -81,10 +87,40 @@ async def send_a2a_message(target_url: str, message: str) -> str:
return f"{_A2A_ERROR_PREFIX}{text}"
return text
elif "error" in data:
return f"{_A2A_ERROR_PREFIX}{data['error'].get('message', 'unknown')}"
return str(data)
err = data["error"]
msg = (err.get("message") or "").strip()
code = err.get("code")
if msg and code is not None:
detail = f"{msg} (code={code})"
elif msg:
detail = msg
elif code is not None:
detail = f"JSON-RPC error with no message (code={code})"
else:
detail = "JSON-RPC error with no message"
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
return f"{_A2A_ERROR_PREFIX}unexpected response shape (no result, no error): {str(data)[:200]} [target={target_url}]"
except Exception as e:
return f"{_A2A_ERROR_PREFIX}{e}"
# Some httpx exceptions stringify to empty (RemoteProtocolError,
# ConnectionReset variants) — the canvas would then render
# "[A2A_ERROR] " with no detail and the operator has no signal
# to act on. Always include the exception class name and the
# target URL so the activity log + Agent Comms panel have
# actionable information without a trip through container logs.
msg = str(e).strip()
type_name = type(e).__name__
if not msg:
detail = f"{type_name} (no message — likely connection reset or silent timeout)"
elif msg.startswith(f"{type_name}:") or msg.startswith(f"{type_name} "):
# Already prefixed with the type — don't double-prefix.
# Prefix-anchored check (not substring) so a message that
# happens to mention some OTHER class name mid-string
# (e.g. "got OSError on read") doesn't suppress our own
# type prefix and lose the diagnostic signal.
detail = msg
else:
detail = f"{type_name}: {msg}"
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
async def get_peers() -> list[dict]:

View File

@ -48,6 +48,10 @@ from shared_runtime import (
brief_task,
set_current_task,
)
from executor_helpers import (
collect_outbound_files,
extract_attached_files,
)
from builtin_tools.telemetry import (
A2A_TASK_ID,
GEN_AI_OPERATION_NAME,
@ -211,6 +215,18 @@ class LangGraphA2AExecutor(AgentExecutor):
3. Message(final_text) terminal event
"""
user_input = extract_message_text(context)
# Pull attached files from A2A message parts (kind: "file") and
# append a manifest to the prompt so the agent knows they exist.
# LangGraph tools (filesystem, bash, skills) can then open the
# files by path — without this the agent silently ignores the
# attachments and replies "I'm not sure what you're referring to".
_attached_files = extract_attached_files(getattr(context, "message", None))
if _attached_files:
_manifest = "\n\nAttached files:\n" + "\n".join(
f"- {f['name']} ({f['mime_type'] or 'unknown type'}) at {f['path']}"
for f in _attached_files
)
user_input = (user_input + _manifest) if user_input else _manifest.lstrip()
if not user_input:
parts = getattr(getattr(context, "message", None), "parts", None)
logger.warning("A2A execute: no text content in message parts: %s", parts)
@ -415,7 +431,38 @@ class LangGraphA2AExecutor(AgentExecutor):
# Non-streaming: ResultAggregator.consume_all() returns this
# immediately as the response (a2a_client.py reads .parts[0].text).
# Streaming: yielded as the last SSE event in the stream.
msg = new_agent_text_message(final_text, task_id=task_id, context_id=context_id)
#
# If the reply mentions /workspace/... paths, stage each one
# and emit as FileParts alongside the text so the canvas can
# render a download button. Same contract the hermes executor
# uses — every runtime going through this code path (langgraph,
# deepagents, future ReAct variants) inherits it.
_outbound = collect_outbound_files(final_text)
if _outbound:
# NOTE: do NOT re-import `Part` here. It is already imported
# at module scope (line 42). A function-scope `from a2a.types
# import ... Part ...` would mark `Part` as a local name
# throughout this function under Python's scoping rules,
# making the earlier `Part(text=text)` call (line ~358, inside
# the astream_events loop) raise UnboundLocalError because
# the local binding is not yet in scope at that point.
from a2a.types import FilePart, FileWithUri, Message, Role, TextPart
_parts: list[Part] = [Part(root=TextPart(text=final_text))] if final_text else []
for f in _outbound:
_parts.append(Part(root=FilePart(file=FileWithUri(
uri="workspace:" + f["path"],
name=f["name"],
mimeType=f["mime_type"],
))))
msg = Message(
messageId=uuid.uuid4().hex,
role=Role.agent,
parts=_parts,
taskId=task_id,
contextId=context_id,
)
else:
msg = new_agent_text_message(final_text, task_id=task_id, context_id=context_id)
# Attach tool_trace via metadata when supported. Guarded with
# hasattr because some test mocks return a plain string here.
if tool_trace and hasattr(msg, "metadata"):

View File

@ -112,7 +112,7 @@ def _auth_headers_for_heartbeat() -> dict[str, str]:
async def report_activity(
activity_type: str, target_id: str = "", summary: str = "", status: str = "ok",
task_text: str = "", response_text: str = "",
task_text: str = "", response_text: str = "", error_detail: str = "",
):
"""Report activity to the platform for live progress tracking."""
try:
@ -129,6 +129,13 @@ async def report_activity(
payload["request_body"] = {"task": task_text}
if response_text:
payload["response_body"] = {"result": response_text}
if error_detail:
# error_detail is a top-level activity row column on the
# platform (handlers/activity.go). Surfacing the cleaned
# exception string here lets the Activity tab render a
# red error chip + the cause without forcing the user
# to scroll into the raw response_body JSON.
payload["error_detail"] = error_detail
await client.post(
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
json=payload,
@ -178,11 +185,23 @@ async def tool_delegate_task(workspace_id: str, task: str) -> str:
# Detect delegation failures — wrap them clearly so the calling agent
# can decide to retry, use another peer, or handle the task itself.
is_error = result.startswith(_A2A_ERROR_PREFIX)
# Strip the sentinel prefix so error_detail is the human-readable
# cause directly. The Activity tab's red error chip surfaces this
# without the user having to scroll into the raw response JSON.
#
# Cap at 4096 chars before sending — the platform's
# activity_logs.error_detail column is unbounded TEXT and a
# malicious or buggy peer could otherwise stream an arbitrarily
# large error message into the caller's activity log. 4096 is
# comfortably above any real exception traceback we've seen and
# well below an obvious-DoS threshold.
error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
await report_activity(
"a2a_receive", workspace_id,
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed",
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
task_text=task, response_text=result,
status="error" if is_error else "ok",
error_detail=error_detail,
)
if is_error:
return (

View File

@ -42,10 +42,15 @@ async def delegate_task(workspace_id: str, task: str) -> str:
except Exception as e:
return f"Error discovering workspace: {e}"
# Send A2A message
# Send A2A message. X-Workspace-ID identifies us as the source —
# without it the platform's a2a_receive logger writes
# source_id=NULL and the recipient's My Chat tab renders the
# delegation as if a human user typed it. Same hazard fixed
# in heartbeat.py / a2a_client.py / main.py initial+idle flows.
try:
a2a_resp = await client.post(
target_url,
headers={"X-Workspace-ID": WORKSPACE_ID},
json={
"jsonrpc": "2.0",
"id": str(uuid.uuid4()),

View File

@ -29,7 +29,7 @@ import asyncio
import logging
import os
import sys
from collections.abc import AsyncIterator
from collections.abc import AsyncIterator, Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
@ -47,7 +47,9 @@ from executor_helpers import (
WORKSPACE_MOUNT,
auto_push_hook,
brief_summary,
collect_outbound_files,
commit_memory,
extract_attached_files,
extract_message_text,
get_a2a_instructions,
get_hma_instructions,
@ -85,6 +87,180 @@ _RETRYABLE_PATTERNS = (
"try again",
)
# Module-level SDK-wedge flag. When claude_agent_sdk's `query.initialize()`
# raises `Control request timeout: initialize`, the SDK's internal client-
# process state is corrupted for the rest of the Python process — every
# subsequent `_run_query()` call hits the same wedge and re-throws. The
# executor itself can't auto-recover (the underlying CLI subprocess and
# its read pipe are in an unrecoverable state); only a workspace restart
# clears it.
#
# The heartbeat task reads these helpers and reports
# `runtime_state="wedged"` to the platform, which flips the workspace to
# `degraded` so the canvas surfaces a Restart hint instead of leaving
# the user staring at a green dot while every chat hangs.
#
# Module scope (not instance scope) is deliberate: the wedge is a
# property of the Python process, not the executor. A future per-org
# multi-executor design could move this to a shared registry, but with
# one executor per workspace process today the simplest lock-free
# read+write fits.
_sdk_wedged_reason: str | None = None
def is_wedged() -> bool:
"""True if the Claude SDK has hit a non-recoverable init wedge in
this process. Sticky until process restart."""
return _sdk_wedged_reason is not None
def wedge_reason() -> str:
"""Human-readable description of the wedge cause, or empty string
when not wedged. Surfaced to the canvas via heartbeat sample_error."""
return _sdk_wedged_reason or ""
def _mark_sdk_wedged(reason: str) -> None:
"""Internal — flag the SDK as wedged. Only the first call wins
(subsequent identical wedges shouldn't overwrite a more specific
reason). Tests use `_reset_sdk_wedge_for_test()` to clear."""
global _sdk_wedged_reason
if _sdk_wedged_reason is None:
_sdk_wedged_reason = reason
logger.error("SDK wedge detected: %s — workspace will report degraded until a successful query clears it", reason)
def _clear_sdk_wedge_on_success() -> None:
"""Auto-recovery — called from _run_query after a successful
completion. The original wedge could be transient (a single network
blip during the SDK's first-message handshake), and a sticky-only
flag would lock the workspace into degraded forever even after the
SDK started working again. Clearing on observed success means the
next heartbeat after a working query reports `runtime_state` empty
and the platform flips status back to online.
No-op when not wedged (the common case)."""
global _sdk_wedged_reason
if _sdk_wedged_reason is not None:
logger.info("SDK wedge cleared after successful query — workspace will recover to online on next heartbeat")
_sdk_wedged_reason = None
def _reset_sdk_wedge_for_test() -> None:
"""Test-only escape hatch. Production code clears the wedge via
`_clear_sdk_wedge_on_success` when a query succeeds; this helper
is for unit tests that need to reset between cases."""
global _sdk_wedged_reason
_sdk_wedged_reason = None
# Per-tool-use summarizers. Reads the most-useful argument from each
# tool's input dict so the canvas progress feed shows
# `🛠 Read /tmp/foo` instead of the bare tool name. Anything not in the
# table falls through to a generic "🛠 <tool>(…)" line. Order keys by
# tool frequency so a future contributor can see the high-traffic
# tools first.
_TOOL_USE_SUMMARIZERS: dict[str, Callable[[dict], str]] = {
"Read": lambda i: f"📄 Read {i.get('file_path', '?')}",
"Write": lambda i: f"✍️ Write {i.get('file_path', '?')}",
"Edit": lambda i: f"✏️ Edit {i.get('file_path', '?')}",
"Bash": lambda i: f"⚡ Bash: {(i.get('command') or '')[:80]}",
"Glob": lambda i: f"🔍 Glob {i.get('pattern', '?')}",
"Grep": lambda i: f"🔍 Grep {i.get('pattern', '?')}",
"WebFetch": lambda i: f"🌐 WebFetch {i.get('url', '?')}",
"WebSearch": lambda i: f"🌐 WebSearch {i.get('query', '?')}",
"Task": lambda i: f"🤖 Task: {(i.get('description') or '')[:60]}",
"TodoWrite": lambda _i: "📝 TodoWrite",
}
def _summarize_tool_use(tool_name: str, tool_input: dict) -> str:
summarizer = _TOOL_USE_SUMMARIZERS.get(tool_name)
if summarizer:
try:
return summarizer(tool_input or {})[:200]
except Exception:
pass
# Generic fallback. Truncated so a tool with a giant input dict
# doesn't write a 10kB activity row per call.
return f"🛠 {tool_name}(…)"[:200]
async def _report_tool_use(block: Any) -> None:
"""Fire-and-forget agent_log activity row per tool the SDK invoked,
so the canvas's MyChat live-progress feed can render each step
Claude is doing instead of staring at a single spinner.
Posts directly to /workspaces/:id/activity rather than through
a2a_tools.report_activity that helper also pushes a current_task
heartbeat which would duplicate as a TASK_UPDATED line in the
chat feed. The workspace card's current_task is already set
once per turn by the executor's set_current_task(brief_summary)
call, so the per-tool telemetry stays a chat-only signal.
Best-effort any failure (network blip, platform unreachable, the
block didn't have the attrs we expected) is swallowed silently.
The tool will still execute regardless; only the progress
telemetry is lost. Deliberately does NOT raise a malformed
block must not abort the message-stream iteration in
`_run_query`.
"""
try:
# Lazy imports to keep this helper non-essential — the
# executor must still run when the workspace's network/auth
# plumbing isn't fully set up (e.g. unit tests).
import httpx
from a2a_client import PLATFORM_URL, WORKSPACE_ID
from platform_auth import auth_headers
except Exception:
return
try:
tool_name = getattr(block, "name", "") or ""
tool_input = getattr(block, "input", {}) or {}
if not tool_name:
return
summary = _summarize_tool_use(tool_name, tool_input)
# 5s budget — long enough to absorb a single platform GC
# pause, short enough that a wedged platform doesn't slow
# the tool-iteration cadence beyond noticeable.
async with httpx.AsyncClient(timeout=5.0) as client:
await client.post(
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
json={
"activity_type": "agent_log",
"source_id": WORKSPACE_ID,
# target_id == source for self-actions. Matches the
# convention other self-logged activity rows use
# (a2a_receive when the workspace logs its own
# outbound reply) so DB consumers joining on
# target_id see a well-defined value.
"target_id": WORKSPACE_ID,
"summary": summary,
"status": "ok",
"method": tool_name,
},
headers=auth_headers(),
)
except Exception:
# Telemetry failures must not break the conversation.
return
# Substring patterns that classify an exception as the specific
# claude_agent_sdk init-timeout wedge (vs. a rate-limit, transient
# subprocess crash, etc.). Match is case-insensitive on the formatted
# error string. Adding a new pattern here MUST come with a test in
# tests/test_claude_sdk_executor.py — false-positives lock the
# workspace into degraded until the next successful query clears it.
#
# `:initialize` suffix-anchored — the SDK can theoretically time out
# on later control messages (in-flight tool callbacks), but those
# don't leave the SDK in the unrecoverable post-init state we're
# trying to detect. Limit the pattern to the specific wedge.
_WEDGE_ERROR_PATTERNS = (
"control request timeout: initialize",
)
_SWALLOWED_STDERR_MARKER = "Check stderr output for details"
@ -344,6 +520,14 @@ class ClaudeSDKExecutor(AgentExecutor):
for block in message.content:
if isinstance(block, sdk.TextBlock):
assistant_chunks.append(block.text)
else:
# ToolUseBlock / ServerToolUseBlock are present
# on the real SDK but not on the conftest stub —
# check by class name to avoid an isinstance()
# against a class the stub doesn't define.
cls = type(block).__name__
if cls in ("ToolUseBlock", "ServerToolUseBlock"):
await _report_tool_use(block)
elif isinstance(message, sdk.ResultMessage):
sid = getattr(message, "session_id", None)
if sid:
@ -352,6 +536,20 @@ class ClaudeSDKExecutor(AgentExecutor):
finally:
self._active_stream = None
text = result_text if result_text is not None else "".join(assistant_chunks)
# Auto-recover the wedge flag — if a previous query() left this
# process in `_sdk_wedged` and THIS query just completed
# cleanly, the SDK clearly works again. Clear so the next
# heartbeat reports runtime_state empty and the platform flips
# status degraded → online without a manual restart.
#
# Gate on actual content from the stream so a degenerate
# "iterator returned without raising but emitted nothing"
# case (possible from a partial stream or a stub SDK) doesn't
# falsely advertise recovery. A real successful query yields
# at least a ResultMessage (sets result_text) or one
# AssistantMessage TextBlock (populates assistant_chunks).
if result_text is not None or assistant_chunks:
_clear_sdk_wedge_on_success()
return QueryResult(text=text, session_id=session_id)
# ------------------------------------------------------------------
@ -365,6 +563,18 @@ class ClaudeSDKExecutor(AgentExecutor):
workspace queue rather than racing on `_session_id` / `_active_stream`.
"""
user_input = extract_message_text(context.message)
# Surface attached files to claude-code via a manifest in the prompt.
# Claude Code reads files through its own Read/Glob tools by path —
# as long as the prompt names the path, the CLI will open them on
# demand. Same contract every platform runtime uses so the UX is
# identical across hermes / langgraph / claude-code.
attached = extract_attached_files(context.message)
if attached:
manifest = "\n\nAttached files:\n" + "\n".join(
f"- {f['name']} ({f['mime_type'] or 'unknown type'}) at {f['path']}"
for f in attached
)
user_input = (user_input + manifest) if user_input else manifest.lstrip()
if not user_input:
await event_queue.enqueue_event(new_agent_text_message(_NO_TEXT_MSG))
return
@ -375,7 +585,26 @@ class ClaudeSDKExecutor(AgentExecutor):
# Enqueue outside the lock so the next queued turn can start
# preparing its prompt while this turn's response ships. Event
# ordering is preserved per-queue by the A2A server, so no races.
await event_queue.enqueue_event(new_agent_text_message(response_text))
# If the response mentions /workspace/... files, stage each and
# emit FileParts alongside the text so the canvas can download.
outbound = collect_outbound_files(response_text)
if outbound:
from a2a.types import FilePart, FileWithUri, Message, Part, Role, TextPart
import uuid as _uuid
parts: list = [Part(root=TextPart(text=response_text))] if response_text else []
for f in outbound:
parts.append(Part(root=FilePart(file=FileWithUri(
uri="workspace:" + f["path"],
name=f["name"],
mimeType=f["mime_type"],
))))
await event_queue.enqueue_event(Message(
messageId=_uuid.uuid4().hex,
role=Role.agent,
parts=parts,
))
else:
await event_queue.enqueue_event(new_agent_text_message(response_text))
@staticmethod
def _is_retryable(exc: BaseException) -> bool:
@ -473,6 +702,19 @@ class ClaudeSDKExecutor(AgentExecutor):
# subprocess died.
logger.error("SDK agent error [claude-code]: %s", formatted)
logger.exception("SDK agent error [claude-code] — full traceback follows")
# Detect the specific claude_agent_sdk init-wedge case
# so the heartbeat task can flip the workspace to
# `degraded`. Match on the lowercased formatted error;
# `formatted` is whatever _format_process_error built,
# which already includes both the message and the
# exception class name.
formatted_lc = formatted.lower()
for pat in _WEDGE_ERROR_PATTERNS:
if pat in formatted_lc:
_mark_sdk_wedged(
f"claude_agent_sdk wedge: {formatted[:200]} — restart workspace to recover"
)
break
response_text = sanitize_agent_error(exc)
break
finally:

View File

@ -10,16 +10,22 @@ Provides:
- Brief task summary extraction (markdown-aware)
- Error message sanitization (exception classes and subprocess categories)
- Shared workspace path constants and the MCP server path resolver
- Attached-file extraction and outbound-file staging (platform-wide chat
attachments every runtime routes through these helpers so the
drag-dropped image / returned report experience is identical)
"""
from __future__ import annotations
import asyncio
import base64
import json
import logging
import mimetypes
import os
import re
import subprocess
import uuid as _uuid
from pathlib import Path
from typing import TYPE_CHECKING, Any
@ -582,3 +588,276 @@ async def auto_push_hook(cwd: str | None = None) -> None:
await asyncio.to_thread(_auto_push_and_pr_sync, cwd)
except Exception:
logger.exception("auto_push_hook: failed (non-fatal)")
# ========================================================================
# Chat attachments — platform-level support for drag-drop uploads and
# agent-returned files. Every runtime executor routes inbound file parts
# through ``extract_attached_files`` + ``build_user_content_with_files``
# and post-processes replies through ``collect_outbound_files`` so a file
# attached in the canvas shows up correctly across hermes, claude-code,
# langgraph, CLI runtimes, etc. Living here (not in any one executor)
# keeps the attachment contract in one place — match canvas/ChatTab.tsx
# and workspace-server/internal/handlers/chat_files.go, and every runtime
# benefits at once.
# ========================================================================
# Matches CHAT_UPLOAD_DIR in workspace-server/internal/handlers/chat_files.go.
# The canvas uploads files here; outbound files get staged here so the
# download endpoint (which whitelists this directory) can serve them.
CHAT_UPLOADS_DIR = f"{WORKSPACE_MOUNT}/.molecule/chat-uploads"
def ensure_workspace_writable() -> None:
"""Make /workspace (and the chat-uploads dir) writable by whoever the
agent will run as.
Docker's default for a new named volume is root-owned 755 — that
bricks the agentuser "write a file, hand it to the user" flow for
every template whose agent runs under a non-root user (hermes uses
`agent`, most others use some dedicated UID too). Each Dockerfile
solving this individually was the anti-pattern; this helper belongs
to the platform so every runtime picks up the fix by calling into
``molecule_runtime`` during boot.
Runs best-effort: if molecule-runtime itself started as non-root
(rare, but possible in some CP configurations), the chmod silently
no-ops the template's own start.sh is expected to have already
handled perms in that case. We prefer silent degradation to a hard
boot failure because misconfigured perms are recoverable (user gets
a clear "permission denied" from the agent) but an uncatchable
exception here would wedge the whole workspace in `provisioning`.
"""
# 777 matches the intent: one container, one tenant, anyone in the
# container can read/write workspace files. Cross-tenant isolation
# happens at the Docker boundary, not inside the volume.
for path in (WORKSPACE_MOUNT, CHAT_UPLOADS_DIR):
try:
os.makedirs(path, exist_ok=True)
os.chmod(path, 0o777)
except PermissionError:
logger.info(
"ensure_workspace_writable: lacking root (non-fatal) for %s", path
)
except OSError as exc:
logger.warning(
"ensure_workspace_writable: %s for %s", exc, path
)
# Cap image inlining so a 25MB PNG doesn't blow past provider context
# limits. Images larger than this fall back to a path mention only —
# the agent can still read them via file_read / bash tools.
MAX_INLINE_ATTACHMENT_BYTES = 8 * 1024 * 1024
# Absolute /workspace/... paths the agent may mention in its reply.
# Leading boundary prevents matching the middle of URLs like
# https://example.com/workspace/foo while allowing markdown emphasis
# wrappers (**, *, _, `, (, [) so "**/workspace/x.pdf**" still matches.
# Trailing '.' is stripped post-capture (see collect_outbound_files).
_WORKSPACE_PATH_RE = re.compile(
r"(?:^|[\s`\"'*_(\[])(/workspace/[A-Za-z0-9_./\-]+)"
)
_UNSAFE_NAME_RE = re.compile(r"[^A-Za-z0-9._\-]")
def resolve_attachment_uri(uri: str) -> str | None:
"""Resolve a canvas-issued attachment URI to an in-container path.
Accepted shapes (matches canvas uploads.ts + chat_files.go):
- ``workspace:/workspace/.molecule/chat-uploads/<name>`` (canonical)
- ``file:///workspace/...`` (legacy)
- ``/workspace/...`` (bare)
Anything resolving outside ``/workspace`` is refused. ``Path.resolve``
collapses ``..`` segments so a crafted ``workspace:/workspace/../etc/passwd``
returns None instead of leaking the real filesystem.
"""
if not uri:
return None
path: str | None = None
if uri.startswith("workspace:"):
path = uri[len("workspace:"):]
elif uri.startswith("file://"):
path = uri[len("file://"):]
elif uri.startswith("/"):
path = uri
if not path:
return None
try:
resolved = str(Path(path).resolve())
except (OSError, RuntimeError):
return None
if not (resolved == WORKSPACE_MOUNT or resolved.startswith(WORKSPACE_MOUNT + "/")):
return None
return resolved
def extract_attached_files(message: Any) -> list[dict[str, str]]:
"""Pull ``{name, mime_type, path}`` dicts out of an A2A message.
Handles the discriminated-union shape ``part.root.file`` that a2a-sdk
produces via Pydantic RootModel, and the flatter ``part.file`` shape
hand-built callers sometimes emit. Non-file parts and files with
unresolvable URIs are skipped the caller sees an empty list rather
than a mix of valid and broken entries.
"""
if message is None:
return []
parts = getattr(message, "parts", None) or []
out: list[dict[str, str]] = []
for part in parts:
root = getattr(part, "root", part)
if getattr(root, "kind", None) != "file":
continue
f = getattr(root, "file", None)
if f is None:
continue
uri = getattr(f, "uri", "") or ""
name = getattr(f, "name", "") or ""
mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or ""
path = resolve_attachment_uri(uri)
if not path or not os.path.isfile(path):
logger.warning("skipping attached file with unresolvable uri=%r", uri)
continue
out.append({"name": name, "mime_type": mime, "path": path})
return out
def _read_as_data_url(path: str, mime_type: str) -> str | None:
"""Return ``data:<mime>;base64,<...>`` or None if too large / unreadable."""
try:
size = os.path.getsize(path)
except OSError:
return None
if size > MAX_INLINE_ATTACHMENT_BYTES:
logger.info(
"attachment %s too large to inline (%d bytes > cap)", path, size
)
return None
try:
with open(path, "rb") as fh:
b64 = base64.b64encode(fh.read()).decode("ascii")
except OSError as exc:
logger.warning("failed to read attachment %s: %s", path, exc)
return None
return f"data:{mime_type or 'application/octet-stream'};base64,{b64}"
def build_user_content_with_files(
user_text: str, attached: list[dict[str, str]]
) -> Any:
"""Combine text + attachments into an OpenAI-compat ``content`` field.
- No attachments plain string (preserves simple shape for non-vision
models).
- Any image attachment list-of-parts with text + image_url entries
(multi-modal; vision-capable models see the image bytes). Skipped
when ``MOLECULE_DISABLE_IMAGE_INLINING`` is truthy some provider/
model combos (e.g. MiniMax's hermes-agent adapter as of 2026-04)
claim vision support but hang indefinitely on image payloads, and
the caller may prefer manifest-only so the agent can still use its
file_read tool instead of stalling the whole request.
- Non-image attachments manifest appended to the text so the agent
knows the filenames + absolute paths and can inspect via its
file_read / bash tools.
This is the platform's one-line fix for "agent didn't know I attached
a file": any executor that calls it gets attachment awareness for
free, regardless of which LLM provider is behind it.
"""
if not attached:
return user_text
manifest_lines = [
f"- {f['name']} ({f['mime_type'] or 'unknown type'}) at {f['path']}"
for f in attached
]
manifest = "Attached files:\n" + "\n".join(manifest_lines)
combined = f"{user_text}\n\n{manifest}" if user_text else manifest
disable_inline = os.environ.get("MOLECULE_DISABLE_IMAGE_INLINING", "").lower() in (
"1", "true", "yes", "on",
)
if disable_inline or not any(
(f["mime_type"] or "").startswith("image/") for f in attached
):
return combined
content: list[dict[str, Any]] = [{"type": "text", "text": combined}]
for f in attached:
mt = f["mime_type"] or ""
if not mt.startswith("image/"):
continue
data_url = _read_as_data_url(f["path"], mt)
if data_url is not None:
content.append({"type": "image_url", "image_url": {"url": data_url}})
return content
def _sanitize_attachment_name(name: str) -> str:
cleaned = _UNSAFE_NAME_RE.sub("_", name) or "file"
return cleaned[:100]
def _guess_mime(path: str) -> str:
mt, _ = mimetypes.guess_type(path)
return mt or "application/octet-stream"
def stage_outbound_file(src_path: str) -> dict[str, str] | None:
"""Copy ``src_path`` into ``CHAT_UPLOADS_DIR`` (unless already there)
and return ``{name, mime_type, path}`` so the caller can attach it to
the A2A reply.
Files already in the chat-uploads directory are attached as-is;
anything elsewhere under /workspace gets a uuid-prefixed copy so
basenames can't collide with existing uploads and the original
workspace layout stays untouched. Returns None on I/O failure.
"""
try:
os.makedirs(CHAT_UPLOADS_DIR, exist_ok=True)
except OSError as exc:
logger.warning("cannot ensure chat-uploads dir: %s", exc)
return None
name = os.path.basename(src_path)
mime = _guess_mime(src_path)
if os.path.dirname(src_path) == CHAT_UPLOADS_DIR:
return {"name": name, "mime_type": mime, "path": src_path}
try:
stored = f"{_uuid.uuid4().hex[:16]}-{_sanitize_attachment_name(name)}"
dst = os.path.join(CHAT_UPLOADS_DIR, stored)
with open(src_path, "rb") as fin, open(dst, "wb") as fout:
fout.write(fin.read())
except OSError as exc:
logger.warning("failed to stage %s → chat-uploads: %s", src_path, exc)
return None
return {"name": name, "mime_type": mime, "path": dst}
def collect_outbound_files(reply_text: str) -> list[dict[str, str]]:
"""Detect /workspace/... paths the agent mentioned in its reply and
stage each one so it can be returned to the canvas as a file part.
Each unique, readable file goes through ``stage_outbound_file`` the
download endpoint only serves files from whitelisted directories, so
a reply referencing /workspace/private/secret.pem still can't be
exfiltrated via the chat download link unless we've explicitly
copied it under the chat-uploads dir.
"""
if not reply_text:
return []
seen: set[str] = set()
out: list[dict[str, str]] = []
for match in _WORKSPACE_PATH_RE.finditer(reply_text):
# Trim trailing sentence punctuation that the character class
# greedily swallowed — "wrote /workspace/x.txt." would otherwise
# resolve to "x.txt." which doesn't exist.
raw = match.group(1).rstrip(".")
resolved = resolve_attachment_uri(raw)
if not resolved or resolved in seen or not os.path.isfile(resolved):
continue
seen.add(resolved)
staged = stage_outbound_file(resolved)
if staged is not None:
out.append(staged)
return out

View File

@ -17,7 +17,31 @@ from pathlib import Path
import httpx
from platform_auth import auth_headers, refresh_cache
from platform_auth import auth_headers, refresh_cache, self_source_headers
def _runtime_state_payload() -> dict:
"""Build the {runtime_state, sample_error} portion of the heartbeat
body when the Claude SDK has hit a wedge. Returns an empty dict
when the runtime is healthy so the heartbeat payload doesn't grow
fields the platform doesn't need.
Imported lazily so workspaces running non-Claude runtimes (where
`claude_sdk_executor` may not be importable at all) keep working
a missing import means "no Claude wedge possible here, healthy."
"""
try:
from claude_sdk_executor import is_wedged, wedge_reason
except Exception:
return {}
if not is_wedged():
return {}
return {
"runtime_state": "wedged",
# sample_error doubles as the human-readable banner text on the
# canvas's degraded card — keep it short and actionable.
"sample_error": wedge_reason(),
}
logger = logging.getLogger(__name__)
@ -85,16 +109,23 @@ class HeartbeatLoop:
while True:
# 1. Send heartbeat (Phase 30.1: include auth header if token known)
try:
body = {
"workspace_id": self.workspace_id,
"error_rate": self.error_rate,
"sample_error": self.sample_error,
"active_tasks": self.active_tasks,
"current_task": self.current_task,
"uptime_seconds": int(time.time() - self.start_time),
}
# Layer the runtime-wedge fields on top so a
# non-empty sample_error from the wedge wins
# over the (typically empty) heartbeat
# sample_error field. The platform reads
# runtime_state to flip status → degraded.
body.update(_runtime_state_payload())
await client.post(
f"{self.platform_url}/registry/heartbeat",
json={
"workspace_id": self.workspace_id,
"error_rate": self.error_rate,
"sample_error": self.sample_error,
"active_tasks": self.active_tasks,
"current_task": self.current_task,
"uptime_seconds": int(time.time() - self.start_time),
},
json=body,
headers=auth_headers(),
)
self.error_count = 0
@ -113,16 +144,18 @@ class HeartbeatLoop:
logger.warning("Heartbeat 401 for %s — refreshing token cache and retrying once", self.workspace_id)
refresh_cache()
try:
retry_body = {
"workspace_id": self.workspace_id,
"error_rate": self.error_rate,
"sample_error": self.sample_error,
"active_tasks": self.active_tasks,
"current_task": self.current_task,
"uptime_seconds": int(time.time() - self.start_time),
}
retry_body.update(_runtime_state_payload())
await client.post(
f"{self.platform_url}/registry/heartbeat",
json={
"workspace_id": self.workspace_id,
"error_rate": self.error_rate,
"sample_error": self.sample_error,
"active_tasks": self.active_tasks,
"current_task": self.current_task,
"uptime_seconds": int(time.time() - self.start_time),
},
json=retry_body,
headers=auth_headers(),
)
self._consecutive_failures = 0
@ -284,6 +317,9 @@ class HeartbeatLoop:
else:
self._last_self_message_time = now
try:
# self_source_headers() adds X-Workspace-ID so the
# platform tags this row source=agent, not canvas
# — see platform_auth.py for the full rationale.
await client.post(
f"{self.platform_url}/workspaces/{self.workspace_id}/a2a",
json={
@ -295,7 +331,7 @@ class HeartbeatLoop:
},
},
},
headers=auth_headers(),
headers=self_source_headers(self.workspace_id),
timeout=120.0,
)
logger.info("Heartbeat: self-message sent to process delegation results")

View File

@ -33,7 +33,7 @@ from initial_prompt import (
mark_initial_prompt_attempted,
resolve_initial_prompt_marker,
)
from platform_auth import auth_headers
from platform_auth import auth_headers, self_source_headers
def get_machine_ip() -> str: # pragma: no cover
@ -69,6 +69,15 @@ async def main(): # pragma: no cover
# 0. Initialise OpenTelemetry (no-op if packages not installed)
setup_telemetry(service_name=workspace_id)
# 0a. Fix /workspace perms before any agent code runs. Docker ships
# named volumes as root:root 755 — without this the non-root agent
# user can't write files the user asked it to produce, and the
# "agent → file → user downloads" flow dead-ends at a bash "permission
# denied". Best-effort: no-ops silently if molecule-runtime itself
# isn't root (template's own start.sh should have handled it there).
from executor_helpers import ensure_workspace_writable
ensure_workspace_writable()
# 1. Load config
config = load_config(config_path)
port = config.a2a.port
@ -430,7 +439,15 @@ async def main(): # pragma: no cover
# silently rejected once any workspace has a live token on
# file. Without this, initial_prompt 401s in multi-tenant
# mode exactly like /registry/register did in #215.
headers = {"Content-Type": "application/json", **auth_headers()}
# X-Workspace-ID via self_source_headers() so the platform
# tags the row source=agent — without it the canvas's
# My Chat tab renders the initial_prompt as if the user
# had typed it. See platform_auth.py for the full
# explanation.
headers = {
"Content-Type": "application/json",
**self_source_headers(workspace_id),
}
# Retry with backoff — the platform proxy may not be able to
# reach us yet (container networking takes a moment to settle).
@ -522,7 +539,13 @@ async def main(): # pragma: no cover
# actual outcome instead of a bare "post failed" line.
# #220: include auth_headers() on every idle fire. Without
# this, the idle loop 401s in multi-tenant mode.
headers = {"Content-Type": "application/json", **auth_headers()}
# self_source_headers() adds X-Workspace-ID so the
# platform classifies the idle fire as source=agent
# rather than user-typed canvas input.
headers = {
"Content-Type": "application/json",
**self_source_headers(workspace_id),
}
try:
req = _urlreq.Request(
f"{platform_url}/workspaces/{workspace_id}/a2a",

View File

@ -98,6 +98,26 @@ def auth_headers() -> dict[str, str]:
return {"Authorization": f"Bearer {tok}"}
def self_source_headers(workspace_id: str) -> dict[str, str]:
"""Return auth headers PLUS X-Workspace-ID identifying this workspace
as the source of the request.
Use this for any POST the workspace's own runtime fires against the
platform's A2A endpoints — heartbeat self-messages, initial_prompt,
idle-loop fires, peer-to-peer A2A from runtime tools. Without the
X-Workspace-ID header the platform's a2a_receive logger writes
source_id=NULL, which the canvas's My Chat tab interprets as a
user-typed message and renders the internal prompt to the user.
See workspace-server/internal/handlers/a2a_proxy.go:184 for the
server-side classification rule.
Centralised here so adding a new system header (e.g. a per-fire
correlation ID) only touches one place and so that any
workspaceA2A POST that doesn't use this helper stands out in
review as a probable bug."""
return {**auth_headers(), "X-Workspace-ID": workspace_id}
def clear_cache() -> None:
"""Reset the in-memory cache. Used by tests that write fresh token
files between cases."""

View File

@ -199,10 +199,34 @@ class TestSendA2AMessage:
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
assert "unknown" in result
# The error includes the JSON-RPC code so the operator can look it
# up; "no message" surfaces the missing-message condition explicitly
# instead of the previous opaque "unknown".
assert "code=-32600" in result
assert "no message" in result.lower()
# Target URL is included so chained delegations are traceable.
assert "target=http://target/a2a" in result
async def test_neither_result_nor_error_returns_str_of_data(self):
"""Response with neither 'result' nor 'error' → str(data)."""
async def test_jsonrpc_error_with_code_zero_includes_code_in_detail(self):
"""JSON-RPC error code=0 is technically not valid in the spec,
but a malformed peer can still send it make sure the code is
preserved in the detail rather than collapsing into the
no-code path. Locks in the `code is not None` semantics over
the truthy-check shortcut."""
import a2a_client
resp = _make_response(200, {"error": {"code": 0, "message": "weird"}})
mock_client = _make_mock_client(post_resp=resp)
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
assert "code=0" in result
assert "weird" in result
async def test_neither_result_nor_error_returns_a2a_error_with_payload(self):
"""Response with neither 'result' nor 'error' → A2A_ERROR + payload context."""
import a2a_client
payload = {"jsonrpc": "2.0", "id": "abc123"}
@ -212,7 +236,14 @@ class TestSendA2AMessage:
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
assert result == str(payload)
# Pre-fix this returned bare str(payload) which the canvas
# rendered as a confusing "looks like a successful response"
# block. Now it's tagged so downstream UI / delegate_task
# routes it through the error path.
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
assert "unexpected response shape" in result
assert "abc123" in result # snippet of payload included for context
assert "target=http://target/a2a" in result
async def test_exception_returns_error_prefix_and_message(self):
"""Network exception → returns _A2A_ERROR_PREFIX + exception text."""
@ -225,6 +256,39 @@ class TestSendA2AMessage:
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
assert "connection refused" in result
# Exception class name is prepended when the message doesn't
# already include it — gives the operator a typed handle to
# search for in container logs.
assert "ConnectionError" in result
assert "target=http://target/a2a" in result
async def test_empty_stringifying_exception_falls_back_to_class_name(self):
"""The user's reported bug: httpx.RemoteProtocolError and similar
exceptions can stringify to "" pre-fix the canvas rendered
"[A2A_ERROR] " with no detail. Verify the empty path now
produces an actionable message including the exception type
and the target URL."""
import a2a_client
# Subclass Exception with __str__ → "" to simulate the
# silent-exception variants without depending on a specific
# httpx version's behavior.
class _SilentRemoteProtocolError(Exception):
def __str__(self) -> str:
return ""
mock_client = _make_mock_client(post_exc=_SilentRemoteProtocolError())
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
# Must NOT be just the bare prefix — that's the regression.
assert result != a2a_client._A2A_ERROR_PREFIX.strip()
assert result != f"{a2a_client._A2A_ERROR_PREFIX}"
# Must include the class name + something explanatory.
assert "_SilentRemoteProtocolError" in result
assert "no message" in result.lower()
assert "target=http://target/a2a" in result
async def test_result_text_part_missing_text_key_returns_empty(self):
"""Part dict without 'text' key → falls back to '' (empty string returned)."""

View File

@ -114,11 +114,11 @@ class TestDelegateTask:
async def __aexit__(self, *a): pass
async def get(self, url, headers=None):
calls.append(("get", url))
calls.append(("get", url, headers))
return _FakeResponse(200, {"url": "http://target.test/a2a"})
async def post(self, url, json=None):
calls.append(("post", url))
async def post(self, url, json=None, headers=None):
calls.append(("post", url, headers))
return _FakeResponse(200, {
"result": {
"parts": [{"kind": "text", "text": "Task done!"}]
@ -130,7 +130,17 @@ class TestDelegateTask:
result = await mod.delegate_task("ws-target", "do something")
assert result == "Task done!"
assert any(c[0] == "get" for c in calls)
assert any(c[0] == "post" for c in calls)
post_calls = [c for c in calls if c[0] == "post"]
assert post_calls, "delegate_task must POST to the target's /a2a endpoint"
# Regression: peer A2A POSTs MUST include X-Workspace-ID so
# the platform's a2a_receive logger writes source_id correctly
# — without it the recipient's My Chat tab would render the
# delegation as user-typed input. Same hazard fixed in
# heartbeat.py / a2a_client.py / main.py initial+idle flows.
post_headers = post_calls[0][2] or {}
assert post_headers.get("X-Workspace-ID"), (
f"delegate_task POST must include X-Workspace-ID; got headers={post_headers!r}"
)
async def test_delegate_task_success_empty_parts(self, monkeypatch):
"""Result with empty parts list falls back to str(result)."""
@ -144,7 +154,7 @@ class TestDelegateTask:
async def get(self, url, headers=None):
return _FakeResponse(200, {"url": "http://target.test/a2a"})
async def post(self, url, json=None):
async def post(self, url, json=None, headers=None):
return _FakeResponse(200, {"result": {"parts": []}})
monkeypatch.setattr(mod.httpx, "AsyncClient", FakeClient)
@ -217,7 +227,7 @@ class TestDelegateTask:
async def get(self, url, headers=None):
return _FakeResponse(200, {"url": "http://target.test/a2a"})
async def post(self, url, json=None):
async def post(self, url, json=None, headers=None):
return _FakeResponse(200, {
"error": {"code": -32603, "message": "Internal error"}
})
@ -240,7 +250,7 @@ class TestDelegateTask:
async def get(self, url, headers=None):
return _FakeResponse(200, {"url": "http://target.test/a2a"})
async def post(self, url, json=None):
async def post(self, url, json=None, headers=None):
return _FakeResponse(200, {"jsonrpc": "2.0", "id": "123"})
monkeypatch.setattr(mod.httpx, "AsyncClient", FakeClient)
@ -262,7 +272,7 @@ class TestDelegateTask:
async def get(self, url, headers=None):
return _FakeResponse(200, {"url": "http://target.test/a2a"})
async def post(self, url, json=None):
async def post(self, url, json=None, headers=None):
call_count["n"] += 1
raise ConnectionError("target down")

View File

@ -21,7 +21,25 @@ _FakeTextBlock = _sdk_stub.TextBlock
_FakeAssistantMessage = _sdk_stub.AssistantMessage
_FakeResultMessage = _sdk_stub.ResultMessage
from claude_sdk_executor import ClaudeSDKExecutor, QueryResult # noqa: E402
from claude_sdk_executor import ( # noqa: E402
ClaudeSDKExecutor,
QueryResult,
_mark_sdk_wedged,
_reset_sdk_wedge_for_test,
is_wedged,
wedge_reason,
)
# Module alias used by the wedge tests below — they read
# `_executor_mod.<helper>` to make the module-state vs function-state
# distinction explicit at the call site, separate from the names
# imported above. Hoisted to the top-of-file imports because the late
# binding (originally at line ~1248) was invisible to @pytest.mark.asyncio
# wrappers under coverage instrumentation (--cov, added by #1817):
# sys.settrace + the asyncio wrapper combination caused a
# `NameError: name '_executor_mod' is not defined` on every async wedge
# test. Hoisting the alias fixes that scope-resolution issue.
import claude_sdk_executor as _executor_mod # noqa: E402
# ---------- Helpers ----------
@ -1221,3 +1239,170 @@ def test_load_config_dict_empty_file_returns_empty(tmp_path):
e = ClaudeSDKExecutor(system_prompt=None, config_path=str(tmp_path), heartbeat=None)
result = e._load_config_dict()
assert result == {}
# ==================== SDK wedge detector ====================
#
# Exercises the module-level _sdk_wedged_reason flag set when the
# claude_agent_sdk init handshake times out. The flag is sticky — the
# heartbeat task reads it via is_wedged() / wedge_reason() and reports
# runtime_state="wedged" so the platform flips status → degraded.
def test_wedge_helpers_default_clean():
"""Fresh module: no wedge."""
_reset_sdk_wedge_for_test()
assert is_wedged() is False
assert wedge_reason() == ""
def test_mark_sdk_wedged_sets_flag_and_reason():
"""First mark wins and sets both is_wedged() and the reason text."""
_reset_sdk_wedge_for_test()
_mark_sdk_wedged("init timeout — restart")
try:
assert is_wedged() is True
assert "init timeout" in wedge_reason()
finally:
_reset_sdk_wedge_for_test()
def test_mark_sdk_wedged_sticky_first_wins():
"""A second wedge call with a different reason does NOT overwrite
the first. The first cause is the one the user needs to see; later
knock-on errors from the same wedge would otherwise mask it."""
_reset_sdk_wedge_for_test()
_mark_sdk_wedged("first cause — Control request timeout")
_mark_sdk_wedged("noise from a downstream symptom")
try:
assert wedge_reason() == "first cause — Control request timeout"
finally:
_reset_sdk_wedge_for_test()
@pytest.mark.asyncio
async def test_execute_marks_wedge_on_control_request_timeout():
"""End-to-end: when _run_query raises an exception whose formatted
error contains 'Control request timeout' (case-insensitive), the
executor's catch block flags the SDK as wedged. Subsequent
is_wedged() reads return True until process restart (or the
test-only reset)."""
_executor_mod._reset_sdk_wedge_for_test()
e = _make_executor()
ctx = _make_context(["test prompt"])
eq = _make_event_queue()
async def boom(prompt, options):
# Match the literal exception claude_agent_sdk raises in the
# observed wedge path.
raise Exception("Control request timeout: initialize")
yield # pragma: no cover — make this an async generator
with patch("claude_sdk_executor.recall_memories", new=AsyncMock(return_value="")), \
patch("claude_sdk_executor.read_delegation_results", return_value=""), \
patch("claude_sdk_executor.commit_memory", new=AsyncMock()), \
patch("claude_sdk_executor.set_current_task", new=AsyncMock()), \
patch("claude_agent_sdk.query", new=boom):
try:
await e.execute(ctx, eq)
assert _executor_mod.is_wedged() is True, "wedge flag must be set"
assert "Control request timeout" in _executor_mod.wedge_reason()
finally:
_executor_mod._reset_sdk_wedge_for_test()
@pytest.mark.asyncio
async def test_execute_does_not_mark_wedge_on_unrelated_error():
"""Sanity: a generic non-wedge exception (e.g. ValueError) MUST
NOT trigger the wedge flag. False-positives lock the workspace
into degraded for the whole process lifetime."""
_executor_mod._reset_sdk_wedge_for_test()
e = _make_executor()
ctx = _make_context(["test prompt"])
eq = _make_event_queue()
async def boom(prompt, options):
raise ValueError("ordinary tool failure, not a wedge")
yield # pragma: no cover
with patch("claude_sdk_executor.recall_memories", new=AsyncMock(return_value="")), \
patch("claude_sdk_executor.read_delegation_results", return_value=""), \
patch("claude_sdk_executor.commit_memory", new=AsyncMock()), \
patch("claude_sdk_executor.set_current_task", new=AsyncMock()), \
patch("claude_agent_sdk.query", new=boom):
try:
await e.execute(ctx, eq)
assert _executor_mod.is_wedged() is False, "non-wedge error must not flip the flag"
finally:
_executor_mod._reset_sdk_wedge_for_test()
@pytest.mark.asyncio
async def test_execute_clears_wedge_on_successful_query():
"""Auto-recovery: a process that previously hit a wedge should be
able to recover when the SDK starts working again. _run_query
calls _clear_sdk_wedge_on_success at the end of a clean
completion; the flag flips back to None and the next heartbeat
reports runtime_state empty so the platform recovers status
online without forcing the user to restart the workspace."""
# Pre-set the wedge as if a prior call had tripped it.
_executor_mod._reset_sdk_wedge_for_test()
_executor_mod._mark_sdk_wedged("transient: Control request timeout: initialize")
assert _executor_mod.is_wedged() is True
e = _make_executor()
ctx = _make_context(["test prompt"])
eq = _make_event_queue()
async def good_query(prompt, options):
# Working SDK — yield one normal assistant message + result.
yield _FakeAssistantMessage([_FakeTextBlock("hello back")])
yield _FakeResultMessage(session_id="recovered-sess")
with patch("claude_sdk_executor.recall_memories", new=AsyncMock(return_value="")), \
patch("claude_sdk_executor.read_delegation_results", return_value=""), \
patch("claude_sdk_executor.commit_memory", new=AsyncMock()), \
patch("claude_sdk_executor.set_current_task", new=AsyncMock()), \
patch("claude_agent_sdk.query", new=good_query):
try:
await e.execute(ctx, eq)
assert _executor_mod.is_wedged() is False, "wedge flag must clear after a successful query"
assert _executor_mod.wedge_reason() == ""
finally:
_executor_mod._reset_sdk_wedge_for_test()
@pytest.mark.asyncio
async def test_execute_does_not_clear_wedge_on_empty_stream():
"""Regression for the gate added in 3c4eef49: a stream that
iterates without raising but emits NEITHER an AssistantMessage
NOR a ResultMessage (degenerate or stub-driven shape) must NOT
clear the wedge flag. A real successful query yields at least
one of those; treating an empty stream as "recovered" would
falsely flip the workspace back to online without any evidence
the SDK is actually working."""
_executor_mod._reset_sdk_wedge_for_test()
_executor_mod._mark_sdk_wedged("pre-existing wedge — must not clear on empty stream")
assert _executor_mod.is_wedged() is True
e = _make_executor()
ctx = _make_context(["test prompt"])
eq = _make_event_queue()
async def empty_query(prompt, options):
# Iterator returns without yielding — the degenerate case.
if False:
yield # pragma: no cover
with patch("claude_sdk_executor.recall_memories", new=AsyncMock(return_value="")), \
patch("claude_sdk_executor.read_delegation_results", return_value=""), \
patch("claude_sdk_executor.commit_memory", new=AsyncMock()), \
patch("claude_sdk_executor.set_current_task", new=AsyncMock()), \
patch("claude_agent_sdk.query", new=empty_query):
try:
await e.execute(ctx, eq)
assert _executor_mod.is_wedged() is True, \
"wedge must persist when the stream emitted no content"
finally:
_executor_mod._reset_sdk_wedge_for_test()

View File

@ -654,3 +654,255 @@ def test_classify_subprocess_error_generic_fallback():
assert classify_subprocess_error("generic unknown failure", None) == "subprocess_error"
# exit_code=0 with no keyword match also lands here
assert classify_subprocess_error("mysterious but zero exit", 0) == "subprocess_error"
# ============================================================================
# Chat attachment helpers (drag-drop file + agent-returned file)
# ============================================================================
def test_resolve_attachment_uri_all_schemes(tmp_path, monkeypatch):
"""All three canvas-issued URI shapes resolve to the same container path.
The canvas mints ``workspace:`` but the download endpoint used to accept
``file:///`` and bare ``/workspace/`` for legacy agents the helper has
to handle all three so agents don't have to normalize before calling us.
"""
from executor_helpers import resolve_attachment_uri, WORKSPACE_MOUNT
# Use a real path that starts with WORKSPACE_MOUNT. resolve() enforces
# the containment check — anything outside /workspace/ must return None.
ws_path = f"{WORKSPACE_MOUNT}/foo.txt"
assert resolve_attachment_uri(f"workspace:{ws_path}") == ws_path
assert resolve_attachment_uri(f"file://{ws_path}") == ws_path
assert resolve_attachment_uri(ws_path) == ws_path
# Out-of-tree is refused even when the raw path shape looks right.
# CWE-22 regression: a crafted "workspace:/workspace/../etc/passwd"
# must NOT return "/etc/passwd" just because resolve() normalizes it.
assert resolve_attachment_uri("/etc/passwd") is None
assert resolve_attachment_uri("workspace:/workspace/../etc/passwd") is None
assert resolve_attachment_uri("") is None
assert resolve_attachment_uri("https://example.com/x") is None
def test_extract_attached_files_skips_unresolvable():
"""Files with URIs that don't resolve to an existing file are dropped.
A crafted A2A message can include any uri it wants; we must not hand
non-existent or out-of-tree paths to downstream code as if they were
real attachments.
"""
from types import SimpleNamespace
from executor_helpers import extract_attached_files
msg = SimpleNamespace(parts=[
SimpleNamespace(kind="file", file=SimpleNamespace(
uri="workspace:/etc/passwd", name="x", mimeType="text/plain"
)),
SimpleNamespace(root=SimpleNamespace(kind="file", file=SimpleNamespace(
uri="/workspace/does-not-exist", name="y", mimeType="text/plain"
))),
SimpleNamespace(kind="text", text="ignored"),
])
assert extract_attached_files(msg) == []
def test_extract_attached_files_accepts_both_shapes(tmp_path, monkeypatch):
"""a2a-sdk emits ``part.root.file`` via RootModel; some callers still
build ``part.file`` directly. Both shapes have to yield the same
dict structure runtimes can pick either without surprise."""
from types import SimpleNamespace
from executor_helpers import extract_attached_files
# Stage two real files under a fake /workspace for the resolver
real_a = tmp_path / "a.txt"
real_b = tmp_path / "b.txt"
real_a.write_text("A")
real_b.write_text("B")
# Point the helper's containment check at tmp_path instead of /workspace
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path))
msg = SimpleNamespace(parts=[
SimpleNamespace(kind="file", file=SimpleNamespace(
uri=f"workspace:{real_a}", name="a.txt", mimeType="text/plain"
)),
SimpleNamespace(root=SimpleNamespace(kind="file", file=SimpleNamespace(
uri=f"workspace:{real_b}", name="b.txt", mimeType="text/plain"
))),
])
out = extract_attached_files(msg)
assert len(out) == 2
assert {f["name"] for f in out} == {"a.txt", "b.txt"}
def test_build_user_content_with_files_no_attachments_is_string():
"""Zero attachments → plain string so models without multi-modal
support (most non-vision LLMs) see the same payload shape they always
did. Regressing this would break every runtime that assumed
content is a string."""
from executor_helpers import build_user_content_with_files
out = build_user_content_with_files("hello", [])
assert out == "hello"
def test_build_user_content_with_files_non_image_is_string_with_manifest():
"""Non-image attachments append a manifest line so the agent knows the
filename and absolute path. Without this the agent had no signal that
anything was attached see canvas/src/components/tabs/ChatTab.tsx
and the "I'm not sure what you're referring to" user report."""
from executor_helpers import build_user_content_with_files
content = build_user_content_with_files("read this", [
{"name": "app.log", "mime_type": "text/plain", "path": "/workspace/app.log"},
])
assert isinstance(content, str)
assert "app.log" in content and "/workspace/app.log" in content
assert "read this" in content
def test_build_user_content_with_files_image_is_multimodal(tmp_path):
"""Image attachments yield the OpenAI-compat list-of-parts shape so
vision models see the bytes. Data URL check covers the common
regression where an empty/missing file silently drops the image part."""
from executor_helpers import build_user_content_with_files
# Minimal 1x1 PNG
png = tmp_path / "x.png"
png.write_bytes(bytes.fromhex(
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f"
"15c4890000000a49444154789c6300010000000500010d0a2db40000000049454e44ae426082"
))
content = build_user_content_with_files("describe", [
{"name": "x.png", "mime_type": "image/png", "path": str(png)},
])
assert isinstance(content, list)
assert len(content) == 2
assert content[0]["type"] == "text"
assert content[1]["type"] == "image_url"
assert content[1]["image_url"]["url"].startswith("data:image/png;base64,")
def test_build_user_content_with_files_large_image_skipped(tmp_path, monkeypatch):
"""Images over the inline cap don't break the request — the manifest
still carries the path so the agent can read via its file_read tool
without blowing past provider context limits with a 50MB base64 blob."""
from executor_helpers import build_user_content_with_files
monkeypatch.setattr("executor_helpers.MAX_INLINE_ATTACHMENT_BYTES", 10)
big = tmp_path / "big.png"
big.write_bytes(b"x" * 100)
content = build_user_content_with_files("describe", [
{"name": "big.png", "mime_type": "image/png", "path": str(big)},
])
# Image too large → no image_url entry, but the text manifest still mentions it
assert isinstance(content, list)
# Only the text part — the image_url was skipped
assert all(c["type"] == "text" for c in content)
def test_collect_outbound_files_stages_workspace_paths(tmp_path, monkeypatch):
"""Agent reply mentioning a /workspace/… path → each unique existing
file becomes an attachment, staged under chat-uploads. A crafted
reply referencing /etc/passwd must NOT escape."""
from pathlib import Path as _Path
from executor_helpers import collect_outbound_files
# Point the chat-uploads dir and the workspace root at a sandboxed tmp.
# resolve() normalizes macOS /var → /private/var so the helper's
# containment check (which also resolve()s) sees identical prefixes.
ws_root = _Path(str(tmp_path / "workspace"))
ws_root.mkdir()
ws_root = ws_root.resolve()
uploads = ws_root / ".molecule" / "chat-uploads"
uploads.mkdir(parents=True)
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(ws_root))
monkeypatch.setattr("executor_helpers.CHAT_UPLOADS_DIR", str(uploads))
# Rebuild the regex against the overridden mount (module caches it)
import re as _re
monkeypatch.setattr(
"executor_helpers._WORKSPACE_PATH_RE",
_re.compile(rf"(?:^|[\s`(\[])({ws_root}/[A-Za-z0-9_./\-]+)"),
)
# A real file inside the fake workspace
report = ws_root / "report.txt"
report.write_text("data")
# A decoy outside the workspace — must be ignored even if mentioned
(tmp_path / "secret.txt").write_text("leaked")
reply = f"Saved to {report} — also see {tmp_path}/secret.txt for extras."
out = collect_outbound_files(reply)
assert len(out) == 1
assert out[0]["name"] == "report.txt"
# Staged copy lives under chat-uploads (the download endpoint's whitelist)
assert out[0]["path"].startswith(str(uploads))
def test_ensure_workspace_writable_chmods_777(tmp_path, monkeypatch):
"""The platform-level hook opens /workspace + chat-uploads to 777 so
agents running as any non-root user can write files the user will
then download. This is the single point of fix for what used to need
a chmod in every template's Dockerfile."""
import stat
from executor_helpers import ensure_workspace_writable
ws = tmp_path / "workspace"
ws.mkdir(mode=0o755)
uploads = ws / ".molecule" / "chat-uploads"
# Don't pre-create uploads — the helper must makedirs it.
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(ws))
monkeypatch.setattr("executor_helpers.CHAT_UPLOADS_DIR", str(uploads))
ensure_workspace_writable()
assert uploads.is_dir(), "chat-uploads dir should be created"
assert stat.S_IMODE(ws.stat().st_mode) == 0o777
assert stat.S_IMODE(uploads.stat().st_mode) == 0o777
def test_ensure_workspace_writable_tolerates_non_root(tmp_path, monkeypatch, caplog):
"""When molecule-runtime isn't root (rare CP configurations), the
chmod silently no-ops rather than crashing boot a misconfigured
perm is recoverable; a SystemExit here would wedge the workspace
in provisioning forever."""
import logging
from executor_helpers import ensure_workspace_writable
ws = tmp_path / "workspace"
ws.mkdir()
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(ws))
monkeypatch.setattr("executor_helpers.CHAT_UPLOADS_DIR", str(ws / "x"))
def _boom(*_a, **_kw):
raise PermissionError("Operation not permitted")
monkeypatch.setattr("executor_helpers.os.chmod", _boom)
with caplog.at_level(logging.INFO, logger="executor_helpers"):
ensure_workspace_writable() # must not raise
def test_collect_outbound_files_deduplicates(tmp_path, monkeypatch):
"""Reply mentioning the same path twice should only attach once."""
from pathlib import Path as _Path
from executor_helpers import collect_outbound_files
ws_root = _Path(str(tmp_path / "workspace"))
ws_root.mkdir()
ws_root = ws_root.resolve()
uploads = ws_root / ".molecule" / "chat-uploads"
uploads.mkdir(parents=True)
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(ws_root))
monkeypatch.setattr("executor_helpers.CHAT_UPLOADS_DIR", str(uploads))
import re as _re
monkeypatch.setattr(
"executor_helpers._WORKSPACE_PATH_RE",
_re.compile(rf"(?:^|[\s`(\[])({ws_root}/[A-Za-z0-9_./\-]+)"),
)
report = ws_root / "report.txt"
report.write_text("data")
reply = f"Wrote {report}. Again at {report}."
out = collect_outbound_files(reply)
assert len(out) == 1

Some files were not shown because too many files have changed in this diff Show More