fix(gate-1): resolve merge conflicts with main

Three add/add + content conflicts, all mechanical: - WorkspaceUsage.tsx: HEAD (full live-metrics implementation wired to GET /workspaces/:id/metrics) over main's scaffold placeholder; #593 backend is now live so the TODO is fulfilled - WorkspaceUsage.test.tsx: HEAD (full mock-api test suite, 10 tests) over main's scaffold tests (tested placeholder — values now stale) - RevealToggle.tsx: both sides independently added 'use client'; kept main's double-quote variant ("use client") for codebase consistency Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 06:16:36 +00:00 · 2026-04-17 06:16:36 +00:00 · bb18f79343
commit bb18f79343
parent 6b5248e173 19396fc55a
68 changed files with 7458 additions and 177 deletions
--- a/.github/workflows/publish-platform-image.yml
+++ b/.github/workflows/publish-platform-image.yml
@ -1,39 +1,25 @@
 name: publish-platform-image

-# Builds and pushes the tenant-platform Docker image to GHCR whenever a
-# commit lands on main. The private molecule-controlplane provisioner sets
-# TENANT_IMAGE=ghcr.io/molecule-ai/platform:<tag> to spawn tenant Fly
-# Machines from this image. See molecule-controlplane README for the pairing.
+# Builds and pushes the platform Docker images to GHCR whenever a commit
+# lands on main. EC2 tenant instances pull the tenant image from GHCR.

 on:
  push:
    branches: [main]
    paths:
-      # Only rebuild when something platform-relevant changes — saves GHA
-      # minutes on docs-only / canvas-only / MCP-only PRs.
      - 'platform/**'
      - 'canvas/**'
      - 'manifest.json'
      - '.github/workflows/publish-platform-image.yml'
-      # Templates now live in standalone repos — template changes no longer
-      # trigger a platform rebuild. Use workflow_dispatch to manually rebuild
-      # if a template repo update needs to be baked into the image.
-  # Manual trigger for re-publishing a tag after a non-platform merge.
  workflow_dispatch:

 permissions:
  contents: read
-  packages: write   # required to push to ghcr.io/${{ github.repository_owner }}/*
+  packages: write

 env:
-  # GHCR accepts mixed-case, but most tooling lowercases — keep us consistent.
  IMAGE_NAME: ghcr.io/molecule-ai/platform
-  # Fly registry mirror — tenant machines provisioned by the private
-  # `molecule-controlplane` pull from here (private GHCR image can't be
-  # pulled by Fly machines without auth plumbing we don't want to add).
-  # Fly auto-authenticates same-org machines against registry.fly.io, so
-  # mirroring keeps GHCR private while tenants still boot.
-  FLY_IMAGE_NAME: registry.fly.io/molecule-tenant
+  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant

 jobs:
  build-and-push:
@ -42,83 +28,33 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4

-      - name: Configure registry auth (write auths map; do NOT call docker login)
-        # `docker login` on macOS unconditionally writes credentials to the
-        # osxkeychain credential helper, even when DOCKER_CONFIG/config.json
-        # declares `credsStore: ""` and even when invoked with `--config`.
-        # Verified locally 2026-04-16 — after a successful login, Docker
-        # rewrites the same config file to:
-        #     { "auths": { "ghcr.io": {} }, "credsStore": "osxkeychain" }
-        # i.e. the auth lives in the Keychain, not the config file. The
-        # Mac mini runner is a launchd user agent with a locked Keychain,
-        # so storage fails with `User interaction is not allowed (-25308)`.
-        #
-        # Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling
-        # `docker login` and tried to coerce credsStore — none worked.
-        # The only reliable fix is to skip `docker login` entirely and write
-        # the auth strings directly. `docker/build-push-action@v5` and the
-        # daemon honor the `auths` map for push without needing login.
-        #
-        # Fly registry username MUST be literal "x" (verified 2026-04-15) —
-        # any other value returns 401. FLY_API_TOKEN lives in GitHub Actions
-        # secrets AND in `fly secrets` on molecule-cp; see
-        # docs/runbooks/saas-secrets.md before rotating.
+      - name: Configure GHCR auth
        shell: bash
        env:
          GHCR_USER: ${{ github.actor }}
          GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          FLY_TOKEN: ${{ secrets.FLY_API_TOKEN }}
        run: |
          set -eu
          mkdir -p "${RUNNER_TEMP}/docker-config"
          GHCR_AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64)
-          FLY_AUTH=$(printf '%s:%s' 'x' "${FLY_TOKEN}" | base64)
          umask 077
-          cat > "${RUNNER_TEMP}/docker-config/config.json" <<JSON
-          {
-            "auths": {
-              "ghcr.io":         { "auth": "${GHCR_AUTH}" },
-              "registry.fly.io": { "auth": "${FLY_AUTH}" }
-            }
-          }
-          JSON
+          printf '{"auths":{"ghcr.io":{"auth":"%s"}}}' "${GHCR_AUTH}" > "${RUNNER_TEMP}/docker-config/config.json"
          echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config" >> "${GITHUB_ENV}"
-          # Diagnostics that don't leak the tokens.
-          echo "=== docker ==="
-          command -v docker || echo "(docker not in PATH)"
-          docker --version 2>&1 || true
-          ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true
-          echo "=== auths registries (no values) ==="
-          grep -o '"[a-zA-Z0-9.-]*\.io"' "${RUNNER_TEMP}/docker-config/config.json" || true

      - name: Set up QEMU
-        # Required on the Apple-silicon self-hosted runner — Fly tenant machines
-        # pull linux/amd64, and buildx needs binfmt handlers in Docker Desktop's
-        # VM to emulate amd64 during the build.
        uses: docker/setup-qemu-action@v3
        with:
          platforms: linux/amd64

      - name: Set up Docker Buildx
-        # Buildx enables cache-from/cache-to via GHA cache and multi-arch
-        # builds without local docker daemon wrangling.
        uses: docker/setup-buildx-action@v3

      - name: Compute tags
        id: tags
-        # Emit two tags per build: `latest` (floating, always the main tip)
-        # and the short commit SHA (immutable, pin-friendly). Control plane
-        # can deploy `latest` today and pin to :sha in Phase H hardening.
        run: |
          echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"

-      - name: Build & push to GHCR
-        # Split from the Fly mirror so a registry.fly.io outage doesn't block
-        # GHCR (or vice versa) — each registry's failure mode is isolated.
-        # GHA cache is shared because both steps re-use the same Dockerfile
-        # context + build args.
-        # Explicit linux/amd64 target: the runner is Apple-silicon (arm64),
-        # but Fly tenant machines are amd64. QEMU handles the emulation.
+      - name: Build & push platform image to GHCR
        uses: docker/build-push-action@v5
        with:
          context: .
@ -133,13 +69,9 @@ jobs:
          labels: |
            org.opencontainers.image.source=https://github.com/${{ github.repository }}
            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.description=Molecule AI tenant platform (one instance per org)
+            org.opencontainers.image.description=Molecule AI platform (Go API server)

-      - name: Build & push tenant image to Fly registry
-        # Tenant image = Go platform + Canvas (Next.js) in one container.
-        # Uses Dockerfile.tenant which includes the canvas build + reverse proxy.
-        # Continues even if GHCR push failed.
-        if: always()
+      - name: Build & push tenant image to GHCR
        uses: docker/build-push-action@v5
        with:
          context: .
@ -147,31 +79,11 @@ jobs:
          platforms: linux/amd64
          push: true
          tags: |
-            ${{ env.FLY_IMAGE_NAME }}:latest
-            ${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
+            ${{ env.TENANT_IMAGE_NAME }}:latest
+            ${{ env.TENANT_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
          cache-from: type=gha
+          cache-to: type=gha,mode=max
          labels: |
            org.opencontainers.image.source=https://github.com/${{ github.repository }}
            org.opencontainers.image.revision=${{ github.sha }}
-            org.opencontainers.image.description=Molecule AI tenant platform + canvas (one instance per org)
-
-      - name: Install flyctl
-        uses: superfly/flyctl-actions/setup-flyctl@master
-
-      - name: Deploy to Fly tenant machines
-        env:
-          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
-        run: |
-          MACHINES=$(flyctl machines list -a molecule-tenant --json | jq -r '.[] | select(.state == "started" or .state == "stopped") | .id')
-          if [ -z "$MACHINES" ]; then
-            echo "No tenant machines found — skipping deploy (control plane provisions on demand)"
-            exit 0
-          fi
-          for id in $MACHINES; do
-            echo "Updating machine $id to sha-${{ steps.tags.outputs.sha }}..."
-            flyctl machines update "$id" \
-              --image "${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}" \
-              -a molecule-tenant \
-              --yes
-          done
-          echo "All tenant machines updated to sha-${{ steps.tags.outputs.sha }}"
+            org.opencontainers.image.description=Molecule AI tenant platform + canvas (one EC2 instance per org)
--- a/.gitignore
+++ b/.gitignore
@ -44,6 +44,10 @@ venv/
 *.egg-info/
 .pytest_cache/

+# Brand monitor runtime state (never commit)
+brand-monitor/.surge_state.json
+brand-monitor/.monitor_state.json
+
 # Docker
 *.log

--- a/canvas/src/app/page.tsx
+++ b/canvas/src/app/page.tsx
@ -1,15 +1,20 @@
 "use client";

-import { useEffect } from "react";
+import { useEffect, useState } from "react";
 import { Canvas } from "@/components/Canvas";
 import { Legend } from "@/components/Legend";
 import { CommunicationOverlay } from "@/components/CommunicationOverlay";
+import { Spinner } from "@/components/Spinner";
 import { connectSocket, disconnectSocket } from "@/store/socket";
 import { useCanvasStore } from "@/store/canvas";
 import { api } from "@/lib/api";
 import type { WorkspaceData } from "@/store/socket";

 export default function Home() {
+  const hydrationError = useCanvasStore((s) => s.hydrationError);
+  const setHydrationError = useCanvasStore((s) => s.setHydrationError);
+  const [hydrating, setHydrating] = useState(true);
+
  useEffect(() => {
    connectSocket();

@ -23,8 +28,13 @@ export default function Home() {
        useCanvasStore.getState().setViewport(viewport);
      }
    }).catch((err) => {
-      // Initial hydration failed — socket reconnect will retry
+      // Initial hydration failed — show error banner to user
      console.error("Canvas: initial hydration failed", err);
+      useCanvasStore.getState().setHydrationError(
+        err instanceof Error && err.message ? err.message : "Failed to load canvas"
+      );
+    }).finally(() => {
+      setHydrating(false);
    });

    return () => {
@ -32,11 +42,39 @@ export default function Home() {
    };
  }, []);

+  if (hydrating) {
+    return (
+      <div className="fixed inset-0 flex items-center justify-center bg-zinc-950">
+        <div className="flex flex-col items-center gap-3">
+          <Spinner size="lg" />
+          <span className="text-xs text-zinc-500">Loading canvas...</span>
+        </div>
+      </div>
+    );
+  }
+
  return (
    <>
      <Canvas />
      <Legend />
      <CommunicationOverlay />
+      {hydrationError && (
+        <div
+          role="alert"
+          className="fixed inset-0 flex flex-col items-center justify-center bg-zinc-950 text-zinc-300 gap-4 z-[9999]"
+        >
+          <p className="text-zinc-400 text-sm">{hydrationError}</p>
+          <button
+            onClick={() => {
+              setHydrationError(null);
+              window.location.reload();
+            }}
+            className="px-4 py-2 bg-blue-600 hover:bg-blue-500 text-white rounded-md text-sm"
+          >
+            Retry
+          </button>
+        </div>
+      )}
    </>
  );
 }
--- a/canvas/src/components/ContextMenu.tsx
+++ b/canvas/src/components/ContextMenu.tsx
@ -235,6 +235,14 @@ export function ContextMenu() {
    closeContextMenu();
  }, [contextMenu, nestNode, closeContextMenu]);

+  const handleZoomToTeam = useCallback(() => {
+    if (!contextMenu) return;
+    window.dispatchEvent(
+      new CustomEvent("molecule:zoom-to-team", { detail: { nodeId: contextMenu.nodeId } })
+    );
+    closeContextMenu();
+  }, [contextMenu, closeContextMenu]);
+
  if (!contextMenu) return null;

  const isOfflineOrFailed = contextMenu.nodeData.status === "offline" || contextMenu.nodeData.status === "failed";
@ -253,7 +261,10 @@ export function ContextMenu() {
      ? [{ label: "Extract from Team", icon: "⤴", action: handleRemoveFromTeam }]
      : []),
    ...(hasChildren
-      ? [{ label: "Collapse Team", icon: "◁", action: handleCollapse }]
+      ? [
+          { label: "Collapse Team", icon: "◁", action: handleCollapse },
+          { label: "Zoom to Team", icon: "⊕", action: handleZoomToTeam },
+        ]
      : [{ label: "Expand to Team", icon: "▷", action: handleExpand }]),
    { label: "", icon: "", action: () => {}, divider: true },
    ...(isPaused
--- a/canvas/src/components/CreateWorkspaceDialog.tsx
+++ b/canvas/src/components/CreateWorkspaceDialog.tsx
@ -1,6 +1,6 @@
 "use client";

-import { useState, useEffect } from "react";
+import { useState, useEffect, useRef, useCallback, useId } from "react";
 import * as Dialog from "@radix-ui/react-dialog";
 import { api } from "@/lib/api";

@ -42,6 +42,7 @@ export function CreateWorkspaceButton() {
  const [tier, setTier] = useState(1);
  const [template, setTemplate] = useState("");
  const [parentId, setParentId] = useState("");
+  const [budgetLimit, setBudgetLimit] = useState("");
  const [creating, setCreating] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [workspaces, setWorkspaces] = useState<WorkspaceOption[]>([]);
@ -50,6 +51,33 @@ export function CreateWorkspaceButton() {
  const [hermesProvider, setHermesProvider] = useState("anthropic");
  const [hermesApiKey, setHermesApiKey] = useState("");

+  // Refs for roving tabIndex on the tier radio group (WCAG 2.1 arrow-key nav)
+  const radioRefs = useRef<Array<HTMLButtonElement | null>>([]);
+  const TIERS = [
+    { value: 1, label: "T1", desc: "Sandboxed" },
+    { value: 2, label: "T2", desc: "Standard" },
+    { value: 3, label: "T3", desc: "Full Access" },
+  ];
+
+  const handleRadioKeyDown = useCallback(
+    (e: React.KeyboardEvent, currentIndex: number) => {
+      if (e.key === "ArrowDown" || e.key === "ArrowRight") {
+        e.preventDefault();
+        const next = (currentIndex + 1) % TIERS.length;
+        setTier(TIERS[next].value);
+        radioRefs.current[next]?.focus();
+      } else if (e.key === "ArrowUp" || e.key === "ArrowLeft") {
+        e.preventDefault();
+        const prev = (currentIndex - 1 + TIERS.length) % TIERS.length;
+        setTier(TIERS[prev].value);
+        radioRefs.current[prev]?.focus();
+      }
+    },
+    // TIERS is stable (module-level constant pattern), setTier is stable from useState
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    []
+  );
+
  const isHermes = template.trim().toLowerCase() === "hermes";

  // Reset form and load workspaces whenever dialog opens
@ -60,6 +88,7 @@ export function CreateWorkspaceButton() {
    setTier(1);
    setTemplate("");
    setParentId("");
+    setBudgetLimit("");
    setError(null);
    setHermesProvider("anthropic");
    setHermesApiKey("");
@ -86,12 +115,17 @@ export function CreateWorkspaceButton() {
      : undefined;

    try {
+      const parsedBudget = budgetLimit.trim()
+        ? parseFloat(budgetLimit)
+        : null;
+
      await api.post("/workspaces", {
        name: name.trim(),
        role: role.trim() || undefined,
        template: template.trim() || undefined,
        tier,
        parent_id: parentId || undefined,
+        budget_limit: parsedBudget,
        canvas: { x: Math.random() * 400 + 100, y: Math.random() * 300 + 100 },
        ...(isHermes && provider
          ? { secrets: { [provider.envVar]: hermesApiKey.trim() } }
@ -155,6 +189,14 @@ export function CreateWorkspaceButton() {
              onChange={setRole}
              placeholder="e.g. SEO Specialist"
            />
+            <InputField
+              label="Budget limit (USD)"
+              value={budgetLimit}
+              onChange={setBudgetLimit}
+              placeholder="e.g. 100"
+              type="number"
+              helper="Leave blank for unlimited"
+            />
            <InputField
              label="Template"
              value={template}
@ -172,16 +214,15 @@ export function CreateWorkspaceButton() {
                <div className="col-span-3 text-[11px] text-zinc-400 mb-1">
                  Tier
                </div>
-                {[
-                  { value: 1, label: "T1", desc: "Sandboxed" },
-                  { value: 2, label: "T2", desc: "Standard" },
-                  { value: 3, label: "T3", desc: "Full Access" },
-                ].map((t) => (
+                {TIERS.map((t, idx) => (
                  <button
                    key={t.value}
+                    ref={(el) => { radioRefs.current[idx] = el; }}
                    role="radio"
                    aria-checked={tier === t.value}
+                    tabIndex={tier === t.value ? 0 : -1}
                    onClick={() => setTier(t.value)}
+                    onKeyDown={(e) => handleRadioKeyDown(e, idx)}
                    className={`py-2 rounded-lg text-center transition-colors ${
                      tier === t.value
                        ? "bg-blue-600/20 border border-blue-500/50 text-blue-300"
@ -315,6 +356,8 @@ function InputField({
  placeholder,
  required,
  mono,
+  type = "text",
+  helper,
 }: {
  label: string;
  value: string;
@ -322,10 +365,16 @@ function InputField({
  placeholder?: string;
  required?: boolean;
  mono?: boolean;
+  type?: string;
+  helper?: string;
 }) {
+  // useId() generates a stable, unique ID for the label↔input association,
+  // satisfying WCAG 2.1 SC 1.3.1 (Info and Relationships, Level A).
+  const inputId = useId();
+
  return (
    <div>
-      <label className="text-[11px] text-zinc-400 block mb-1">
+      <label htmlFor={inputId} className="text-[11px] text-zinc-400 block mb-1">
        {label}{" "}
        {required && (
          <>
@ -337,11 +386,18 @@ function InputField({
        )}
      </label>
      <input
+        id={inputId}
+        type={type}
        value={value}
        onChange={(e) => onChange(e.target.value)}
        placeholder={placeholder}
-        className={`w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 placeholder-zinc-600 focus:outline-none focus:border-blue-500/60 focus:ring-1 focus:ring-blue-500/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
+        min={type === "number" ? "0" : undefined}
+        step={type === "number" ? "0.01" : undefined}
+        className={`w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 placeholder-zinc-500 focus:outline-none focus:border-blue-500/60 focus:ring-1 focus:ring-blue-500/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
      />
+      {helper && (
+        <p className="mt-1 text-xs text-zinc-500">{helper}</p>
+      )}
    </div>
  );
 }
--- a/canvas/src/components/SidePanel.tsx
+++ b/canvas/src/components/SidePanel.tsx
@ -173,7 +173,7 @@ export function SidePanel() {
          else if (e.key === "End") { e.preventDefault(); next = TABS.length - 1; }
          if (next !== null) {
            setPanelTab(TABS[next].id);
-            requestAnimationFrame(() => { document.getElementById(`tab-${TABS[next!].id}`)?.focus(); });
+            requestAnimationFrame(() => { const el = document.getElementById(`tab-${TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
          }
        }}
      >
--- a/canvas/src/components/tests/BudgetLimit.DetailsTab.test.tsx
+++ b/canvas/src/components/tests/BudgetLimit.DetailsTab.test.tsx
@ -0,0 +1,221 @@
+// @vitest-environment jsdom
+/**
+ * DetailsTab integration tests for issue #541.
+ *
+ * Budget-specific logic (stats, progress bar, PATCH /budget, 402 handling) is
+ * fully covered by BudgetSection.test.tsx — this file focuses on:
+ *   1. BudgetSection being mounted inside DetailsTab
+ *   2. The workspace edit form (name / role / tier) no longer carrying
+ *      budget_limit — that concern lives in BudgetSection now
+ *   3. PATCH /workspaces/:id body integrity (no accidental budget_limit leak)
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
+
+// ── Mocks ─────────────────────────────────────────────────────────────────────
+
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: vi.fn(),
+    patch: vi.fn(),
+    del: vi.fn(),
+    post: vi.fn(),
+  },
+}));
+
+vi.mock("@/store/canvas", () => ({
+  useCanvasStore: vi.fn((selector: (s: unknown) => unknown) =>
+    selector({
+      updateNodeData: mockUpdateNodeData,
+      removeNode: vi.fn(),
+      selectNode: vi.fn(),
+    })
+  ),
+}));
+
+vi.mock("../StatusDot", () => ({ StatusDot: () => null }));
+
+// Mock BudgetSection — it has its own test suite (BudgetSection.test.tsx).
+// Without this mock its internal api.get would fire against the shared mock
+// and cause type errors when the return is not a valid BudgetData object.
+vi.mock("../tabs/BudgetSection", () => ({
+  BudgetSection: ({ workspaceId }: { workspaceId: string }) => (
+    <div data-testid="budget-section-stub" data-ws={workspaceId} />
+  ),
+}));
+
+import { api } from "@/lib/api";
+import { DetailsTab } from "../tabs/DetailsTab";
+
+const mockPatch = vi.mocked(api.patch);
+const mockGet = vi.mocked(api.get);
+const mockUpdateNodeData = vi.fn();
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+function makeData(overrides: Record<string, unknown> = {}) {
+  return {
+    name: "Test Agent",
+    role: "Researcher",
+    tier: 1,
+    status: "online",
+    agentCard: null,
+    activeTasks: 0,
+    collapsed: false,
+    lastErrorRate: 0,
+    lastSampleError: "",
+    url: "http://localhost:8080",
+    parentId: null,
+    currentTask: "",
+    runtime: "langgraph",
+    needsRestart: false,
+    budgetLimit: null,
+    budgetUsed: null,
+    ...overrides,
+  };
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  mockGet.mockResolvedValue([] as any);
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  mockPatch.mockResolvedValue({} as any);
+});
+
+afterEach(() => {
+  cleanup();
+});
+
+async function openEdit() {
+  const editBtn = screen.getAllByRole("button").find((b) => b.textContent === "Edit");
+  fireEvent.click(editBtn!);
+  await waitFor(() =>
+    expect(screen.getAllByRole("button").some((b) => b.textContent === "Save")).toBe(true)
+  );
+}
+
+// ── BudgetSection mounting ────────────────────────────────────────────────────
+
+describe("DetailsTab — BudgetSection integration", () => {
+  it("renders BudgetSection with the correct workspaceId", () => {
+    render(<DetailsTab workspaceId="ws-42" data={makeData()} />);
+    const stub = screen.getByTestId("budget-section-stub");
+    expect(stub).toBeTruthy();
+    expect(stub.getAttribute("data-ws")).toBe("ws-42");
+  });
+});
+
+// ── Workspace edit form (no budget_limit) ──────────────────────────────────────
+
+describe("DetailsTab — workspace edit form does not include budget_limit", () => {
+  it("does NOT show a 'Budget limit (USD)' input in the edit form", async () => {
+    render(<DetailsTab workspaceId="ws-1" data={makeData()} />);
+    await openEdit();
+    // Budget limit (USD) was the old inline field label — must be absent now
+    expect(screen.queryByPlaceholderText("Leave blank for unlimited")).toBeNull();
+    expect(screen.queryByText("Budget limit (USD)")).toBeNull();
+  });
+
+  it("PATCH /workspaces/:id body does NOT include budget_limit", async () => {
+    render(<DetailsTab workspaceId="ws-1" data={makeData({ name: "My Agent" })} />);
+    await openEdit();
+
+    const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
+    fireEvent.click(saveBtn!);
+
+    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
+    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
+    expect(Object.prototype.hasOwnProperty.call(body, "budget_limit")).toBe(false);
+  });
+
+  it("PATCH /workspaces/:id body includes name, role, and tier", async () => {
+    render(
+      <DetailsTab
+        workspaceId="ws-1"
+        data={makeData({ name: "Alpha", role: "Writer", tier: 2 })}
+      />
+    );
+    await openEdit();
+
+    const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
+    fireEvent.click(saveBtn!);
+
+    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
+    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.name).toBe("Alpha");
+    expect(body.role).toBe("Writer");
+    expect(body.tier).toBe(2);
+  });
+
+  it("Cancel reverts name, role, tier without touching budget state", async () => {
+    render(
+      <DetailsTab
+        workspaceId="ws-1"
+        data={makeData({ name: "Original", role: "Dev" })}
+      />
+    );
+    await openEdit();
+
+    // Modify name
+    fireEvent.change(
+      screen.getAllByRole("textbox").find((i) => (i as HTMLInputElement).value === "Original")!,
+      { target: { value: "Modified" } }
+    );
+
+    const cancelBtn = screen.getAllByRole("button").find((b) => b.textContent === "Cancel");
+    fireEvent.click(cancelBtn!);
+
+    // Should be back in read view — no Save button visible
+    expect(screen.queryAllByRole("button").some((b) => b.textContent === "Save")).toBe(false);
+    // Workspace info unchanged in read view
+    expect(screen.getByText("Original")).toBeTruthy();
+  });
+
+  it("updateNodeData is called with name/role/tier but NOT budgetLimit on save", async () => {
+    render(
+      <DetailsTab
+        workspaceId="ws-1"
+        data={makeData({ name: "Bot", role: "Analyst", tier: 1 })}
+      />
+    );
+    await openEdit();
+
+    const saveBtn = screen.getAllByRole("button").find((b) => b.textContent === "Save");
+    fireEvent.click(saveBtn!);
+
+    await waitFor(() => expect(mockUpdateNodeData).toHaveBeenCalled());
+    const updateArgs = mockUpdateNodeData.mock.calls[0][1] as Record<string, unknown>;
+    expect(updateArgs.name).toBe("Bot");
+    expect(updateArgs.role).toBe("Analyst");
+    expect(updateArgs.tier).toBe(1);
+    expect(Object.prototype.hasOwnProperty.call(updateArgs, "budgetLimit")).toBe(false);
+  });
+});
+
+// ── budget-exceeded-badge removed from DetailsTab ────────────────────────────
+
+describe("DetailsTab — no inline budget-exceeded-badge", () => {
+  it("does NOT render budget-exceeded-badge even when budgetUsed > budgetLimit (BudgetSection owns that)", () => {
+    render(
+      <DetailsTab
+        workspaceId="ws-1"
+        data={makeData({ budgetLimit: 10, budgetUsed: 99 })}
+      />
+    );
+    // The old inline badge is gone — BudgetSection.tsx owns the exceeded state
+    expect(screen.queryByTestId("budget-exceeded-badge")).toBeNull();
+  });
+
+  it("does NOT render inline Budget limit row in read view", () => {
+    render(
+      <DetailsTab
+        workspaceId="ws-1"
+        data={makeData({ budgetLimit: 100 })}
+      />
+    );
+    // "$100.00" and "Unlimited" are rendered by BudgetSection now
+    expect(screen.queryByText("$100.00")).toBeNull();
+    expect(screen.queryByText("Unlimited")).toBeNull();
+  });
+});
--- a/canvas/src/components/tests/BudgetSection.test.tsx
+++ b/canvas/src/components/tests/BudgetSection.test.tsx
@ -0,0 +1,389 @@
+// @vitest-environment jsdom
+/**
+ * Tests for BudgetSection (issue #541).
+ *
+ * Covers:
+ *  - Loading state
+ *  - Stats row: used / limit, "Unlimited" when null
+ *  - Progress bar: correct percentage, capped at 100%, absent when no limit
+ *  - Budget remaining text
+ *  - Input pre-fill (existing limit / blank when null)
+ *  - Save: PATCH with number, PATCH with null (blank input)
+ *  - 402 on GET → exceeded banner, no fetch-error text
+ *  - 402 on PATCH → exceeded banner
+ *  - Non-402 fetch error → error text
+ *  - Non-402 save error → save error alert
+ *  - Section header and subheading
+ *  - Fetch error does not show stats
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import {
+  render,
+  screen,
+  fireEvent,
+  waitFor,
+  cleanup,
+  act,
+} from "@testing-library/react";
+
+// ── Mock api ──────────────────────────────────────────────────────────────────
+
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: vi.fn(),
+    patch: vi.fn(),
+  },
+}));
+
+import { api } from "@/lib/api";
+import { BudgetSection } from "../tabs/BudgetSection";
+
+const mockGet = vi.mocked(api.get);
+const mockPatch = vi.mocked(api.patch);
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+function budgetResponse(overrides: Partial<{
+  budget_limit: number | null;
+  budget_used: number;
+  budget_remaining: number | null;
+}> = {}) {
+  return {
+    budget_limit: 1000,
+    budget_used: 250,
+    budget_remaining: 750,
+    ...overrides,
+  };
+}
+
+function make402Error(): Error {
+  return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
+}
+
+function make402PatchError(): Error {
+  return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
+}
+
+function makeGenericError(msg = "network timeout"): Error {
+  return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+afterEach(() => {
+  cleanup();
+});
+
+// ── Rendering helpers ─────────────────────────────────────────────────────────
+
+async function renderLoaded(budgetData = budgetResponse()) {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  mockGet.mockResolvedValueOnce(budgetData as any);
+  render(<BudgetSection workspaceId="ws-1" />);
+  // Wait for loading to finish
+  await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
+}
+
+// ── Loading state ─────────────────────────────────────────────────────────────
+
+describe("BudgetSection — loading state", () => {
+  it("shows loading indicator while fetch is in flight", () => {
+    // Never resolve
+    mockGet.mockReturnValue(new Promise(() => {}));
+    render(<BudgetSection workspaceId="ws-1" />);
+    expect(screen.getByTestId("budget-loading")).toBeTruthy();
+    expect(screen.getByText("Loading…")).toBeTruthy();
+  });
+
+  it("hides loading indicator after fetch resolves", async () => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockGet.mockResolvedValueOnce(budgetResponse() as any);
+    render(<BudgetSection workspaceId="ws-1" />);
+    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
+  });
+});
+
+// ── Section header ────────────────────────────────────────────────────────────
+
+describe("BudgetSection — header and subheading", () => {
+  it("renders 'Budget' as the section heading", async () => {
+    await renderLoaded();
+    expect(screen.getByText("Budget")).toBeTruthy();
+  });
+
+  it("renders the subheading 'Limit total message credits for this workspace'", async () => {
+    await renderLoaded();
+    expect(
+      screen.getByText("Limit total message credits for this workspace")
+    ).toBeTruthy();
+  });
+
+  it("renders 'Budget limit (credits)' label for the input", async () => {
+    await renderLoaded();
+    expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
+  });
+});
+
+// ── Stats row ─────────────────────────────────────────────────────────────────
+
+describe("BudgetSection — stats row", () => {
+  it("shows budget_used in the stats row", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
+    expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
+  });
+
+  it("shows budget_limit in the stats row", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
+    expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
+  });
+
+  it("shows 'Unlimited' when budget_limit is null", async () => {
+    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
+    expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
+  });
+
+  it("shows budget_remaining when present", async () => {
+    await renderLoaded(budgetResponse({ budget_remaining: 750 }));
+    expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
+    expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
+  });
+
+  it("hides budget_remaining row when null", async () => {
+    await renderLoaded(budgetResponse({ budget_remaining: null }));
+    expect(screen.queryByTestId("budget-remaining")).toBeNull();
+  });
+});
+
+// ── Progress bar ──────────────────────────────────────────────────────────────
+
+describe("BudgetSection — progress bar", () => {
+  it("renders the progress bar when budget_limit is set", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
+    expect(screen.getByRole("progressbar")).toBeTruthy();
+  });
+
+  it("does NOT render progress bar when budget_limit is null", async () => {
+    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
+    expect(screen.queryByRole("progressbar")).toBeNull();
+  });
+
+  it("fills to the correct percentage (25%)", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
+    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
+    expect(fill.style.width).toBe("25%");
+  });
+
+  it("fills to the correct percentage (50%)", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
+    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
+    expect(fill.style.width).toBe("50%");
+  });
+
+  it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
+    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
+    expect(fill.style.width).toBe("100%");
+  });
+
+  it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
+    await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
+    const bar = screen.getByRole("progressbar");
+    expect(bar.getAttribute("aria-valuenow")).toBe("30");
+  });
+});
+
+// ── Input pre-fill ────────────────────────────────────────────────────────────
+
+describe("BudgetSection — input pre-fill", () => {
+  it("pre-fills input with existing budget_limit", async () => {
+    await renderLoaded(budgetResponse({ budget_limit: 500 }));
+    const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
+    expect(input.value).toBe("500");
+  });
+
+  it("leaves input empty when budget_limit is null", async () => {
+    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
+    const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
+    expect(input.value).toBe("");
+  });
+});
+
+// ── Save — PATCH calls ────────────────────────────────────────────────────────
+
+describe("BudgetSection — save", () => {
+  it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
+    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
+
+    fireEvent.change(screen.getByTestId("budget-limit-input"), {
+      target: { value: "800" },
+    });
+    fireEvent.click(screen.getByTestId("budget-save-btn"));
+
+    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
+    expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
+    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.budget_limit).toBe(800);
+  });
+
+  it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
+    // Regression for QA bug report: `parseInt("0") || null` would yield null.
+    // The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
+    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
+
+    fireEvent.change(screen.getByTestId("budget-limit-input"), {
+      target: { value: "0" },
+    });
+    fireEvent.click(screen.getByTestId("budget-save-btn"));
+
+    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
+    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.budget_limit).toBe(0);
+    expect(body.budget_limit).not.toBeNull();
+  });
+
+  it("sends budget_limit: null when input is blank", async () => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
+    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
+
+    fireEvent.change(screen.getByTestId("budget-limit-input"), {
+      target: { value: "" },
+    });
+    fireEvent.click(screen.getByTestId("budget-save-btn"));
+
+    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
+    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.budget_limit).toBeNull();
+  });
+
+  it("updates displayed stats after successful save", async () => {
+    const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockPatch.mockResolvedValueOnce(updated as any);
+    await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
+
+    fireEvent.change(screen.getByTestId("budget-limit-input"), {
+      target: { value: "2000" },
+    });
+    fireEvent.click(screen.getByTestId("budget-save-btn"));
+
+    await waitFor(() =>
+      expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
+    );
+  });
+
+  it("shows save error message on non-402 PATCH failure", async () => {
+    mockPatch.mockRejectedValueOnce(
+      new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
+    );
+    await renderLoaded();
+
+    fireEvent.click(screen.getByTestId("budget-save-btn"));
+
+    await waitFor(() =>
+      expect(screen.getByTestId("budget-save-error")).toBeTruthy()
+    );
+    expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
+  });
+});
+
+// ── 402 handling ──────────────────────────────────────────────────────────────
+
+describe("BudgetSection — 402 handling", () => {
+  it("shows exceeded banner when GET returns 402", async () => {
+    mockGet.mockRejectedValueOnce(make402Error());
+    render(<BudgetSection workspaceId="ws-1" />);
+
+    await waitFor(() =>
+      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
+    );
+    expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
+  });
+
+  it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
+    mockGet.mockRejectedValueOnce(make402Error());
+    render(<BudgetSection workspaceId="ws-1" />);
+
+    await waitFor(() =>
+      expect(screen.queryByTestId("budget-loading")).toBeNull()
+    );
+    expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
+    expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
+  });
+
+  it("shows exceeded banner when PATCH returns 402", async () => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockGet.mockResolvedValueOnce(budgetResponse() as any);
+    mockPatch.mockRejectedValueOnce(make402PatchError());
+    render(<BudgetSection workspaceId="ws-1" />);
+    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
+
+    fireEvent.click(screen.getByTestId("budget-save-btn"));
+
+    await waitFor(() =>
+      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
+    );
+    // Should NOT also show the save-error alert
+    expect(screen.queryByTestId("budget-save-error")).toBeNull();
+  });
+
+  it("clears exceeded banner after a successful save", async () => {
+    mockGet.mockRejectedValueOnce(make402Error());
+    render(<BudgetSection workspaceId="ws-1" />);
+    await waitFor(() =>
+      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
+    );
+
+    // Now a successful PATCH (limit was raised)
+    const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    mockPatch.mockResolvedValueOnce(updated as any);
+
+    await act(async () => {
+      fireEvent.change(screen.getByTestId("budget-limit-input"), {
+        target: { value: "5000" },
+      });
+      fireEvent.click(screen.getByTestId("budget-save-btn"));
+    });
+
+    await waitFor(() =>
+      expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
+    );
+  });
+});
+
+// ── Non-402 fetch error ───────────────────────────────────────────────────────
+
+describe("BudgetSection — non-402 fetch errors", () => {
+  it("shows fetch error text on non-402 GET failure", async () => {
+    mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
+    render(<BudgetSection workspaceId="ws-1" />);
+
+    await waitFor(() =>
+      expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
+    );
+    expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
+  });
+
+  it("does NOT show stats row on fetch error", async () => {
+    mockGet.mockRejectedValueOnce(makeGenericError());
+    render(<BudgetSection workspaceId="ws-1" />);
+
+    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
+    expect(screen.queryByTestId("budget-stats-row")).toBeNull();
+  });
+
+  it("does NOT show exceeded banner on non-402 fetch error", async () => {
+    mockGet.mockRejectedValueOnce(makeGenericError());
+    render(<BudgetSection workspaceId="ws-1" />);
+
+    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
+    expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
+  });
+});
--- a/canvas/src/components/tests/ContextMenu.keyboard.test.tsx
+++ b/canvas/src/components/tests/ContextMenu.keyboard.test.tsx
@ -163,4 +163,50 @@ describe("ContextMenu — keyboard accessibility", () => {
    const { container } = render(<ContextMenu />);
    expect(container.firstChild).toBeNull();
  });
+
+  // ── Zoom to Team (#557) ───────────────────────────────────────────────────
+
+  it("does NOT show 'Zoom to Team' when node has no children", () => {
+    mockStore.nodes = []; // no children
+    render(<ContextMenu />);
+    const items = screen.getAllByRole("menuitem");
+    const labels = items.map((el) => el.textContent ?? "");
+    expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(false);
+  });
+
+  it("shows 'Zoom to Team' when the node has children", () => {
+    mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
+    render(<ContextMenu />);
+    const items = screen.getAllByRole("menuitem");
+    const labels = items.map((el) => el.textContent ?? "");
+    expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(true);
+  });
+
+  it("clicking 'Zoom to Team' dispatches molecule:zoom-to-team event", () => {
+    mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
+    const dispatched: CustomEvent[] = [];
+    window.addEventListener("molecule:zoom-to-team", (e) => {
+      dispatched.push(e as CustomEvent);
+    });
+
+    render(<ContextMenu />);
+    const items = screen.getAllByRole("menuitem");
+    const zoomItem = items.find((el) => el.textContent?.includes("Zoom to Team"))!;
+    expect(zoomItem).toBeTruthy();
+    fireEvent.click(zoomItem);
+
+    expect(dispatched).toHaveLength(1);
+    expect(dispatched[0].detail.nodeId).toBe("ws-1");
+
+    window.removeEventListener("molecule:zoom-to-team", () => {});
+  });
+
+  it("clicking 'Zoom to Team' closes the context menu", () => {
+    mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
+    render(<ContextMenu />);
+    const items = screen.getAllByRole("menuitem");
+    const zoomItem = items.find((el) => el.textContent?.includes("Zoom to Team"))!;
+    fireEvent.click(zoomItem);
+    expect(closeContextMenu).toHaveBeenCalled();
+  });
 });
--- a/canvas/src/components/tests/CreateWorkspaceDialog.a11y.test.tsx
+++ b/canvas/src/components/tests/CreateWorkspaceDialog.a11y.test.tsx
@ -89,4 +89,144 @@ describe("CreateWorkspaceDialog — accessibility", () => {
      expect(t2?.getAttribute("aria-checked")).toBe("true")
    );
  });
+
+  // ── Arrow-key navigation (WCAG 2.1 radio group) — Issue #556 ──────────────
+
+  it("selected radio has tabIndex=0, others have tabIndex=-1 (roving tabIndex)", async () => {
+    await openDialog();
+    const radios = screen.getAllByRole("radio");
+    const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
+    const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
+    const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
+    // T1 is default selected
+    expect(t1.getAttribute("tabindex")).toBe("0");
+    expect(t2.getAttribute("tabindex")).toBe("-1");
+    expect(t3.getAttribute("tabindex")).toBe("-1");
+  });
+
+  it("ArrowDown moves selection from T1 to T2", async () => {
+    await openDialog();
+    const radios = screen.getAllByRole("radio");
+    const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
+    const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
+    t1.focus();
+    fireEvent.keyDown(t1, { key: "ArrowDown" });
+    await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
+    expect(t1.getAttribute("aria-checked")).toBe("false");
+  });
+
+  it("ArrowRight moves selection from T2 to T3", async () => {
+    await openDialog();
+    const radios = screen.getAllByRole("radio");
+    const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
+    const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
+    fireEvent.click(t2); // select T2 first
+    await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
+    t2.focus();
+    fireEvent.keyDown(t2, { key: "ArrowRight" });
+    await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
+  });
+
+  it("ArrowDown wraps from T3 back to T1", async () => {
+    await openDialog();
+    const radios = screen.getAllByRole("radio");
+    const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
+    const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
+    fireEvent.click(t3); // select T3 first
+    await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
+    t3.focus();
+    fireEvent.keyDown(t3, { key: "ArrowDown" });
+    await waitFor(() => expect(t1.getAttribute("aria-checked")).toBe("true"));
+  });
+
+  it("ArrowUp moves selection from T2 to T1", async () => {
+    await openDialog();
+    const radios = screen.getAllByRole("radio");
+    const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
+    const t2 = radios.find((r) => r.textContent?.includes("T2"))!;
+    fireEvent.click(t2);
+    await waitFor(() => expect(t2.getAttribute("aria-checked")).toBe("true"));
+    t2.focus();
+    fireEvent.keyDown(t2, { key: "ArrowUp" });
+    await waitFor(() => expect(t1.getAttribute("aria-checked")).toBe("true"));
+  });
+
+  it("ArrowLeft wraps from T1 back to T3", async () => {
+    await openDialog();
+    const radios = screen.getAllByRole("radio");
+    const t1 = radios.find((r) => r.textContent?.includes("T1"))!;
+    const t3 = radios.find((r) => r.textContent?.includes("T3"))!;
+    t1.focus();
+    fireEvent.keyDown(t1, { key: "ArrowLeft" });
+    await waitFor(() => expect(t3.getAttribute("aria-checked")).toBe("true"));
+  });
+});
+
+// ── WCAG 2.1 SC 1.3.1 — Programmatic label association (Issue #558) ──────────
+//
+// Every <input> rendered by the InputField helper must have a matching <label>
+// via htmlFor/id so screen readers announce the field name, not just the
+// placeholder.  useId() in InputField generates stable unique IDs per render.
+
+describe("CreateWorkspaceDialog — WCAG SC 1.3.1 label/input association", () => {
+  it("Name input has a <label> whose htmlFor matches the input id", async () => {
+    await openDialog();
+    const nameInput = screen.getByPlaceholderText("e.g. SEO Agent") as HTMLInputElement;
+    expect(nameInput.id).toBeTruthy();
+    const label = document.querySelector(`label[for="${nameInput.id}"]`);
+    expect(label).toBeTruthy();
+    expect(label?.textContent).toContain("Name");
+  });
+
+  it("Role input has a <label> whose htmlFor matches the input id", async () => {
+    await openDialog();
+    const roleInput = screen.getByPlaceholderText("e.g. SEO Specialist") as HTMLInputElement;
+    expect(roleInput.id).toBeTruthy();
+    const label = document.querySelector(`label[for="${roleInput.id}"]`);
+    expect(label).toBeTruthy();
+    expect(label?.textContent).toContain("Role");
+  });
+
+  it("Budget limit input has a <label> whose htmlFor matches the input id", async () => {
+    await openDialog();
+    const budgetInput = screen.getByPlaceholderText("e.g. 100") as HTMLInputElement;
+    expect(budgetInput.id).toBeTruthy();
+    const label = document.querySelector(`label[for="${budgetInput.id}"]`);
+    expect(label).toBeTruthy();
+    expect(label?.textContent).toContain("Budget limit");
+  });
+
+  it("Template input has a <label> whose htmlFor matches the input id", async () => {
+    await openDialog();
+    const templateInput = screen.getByPlaceholderText(
+      "e.g. seo-agent (from workspace-configs-templates/)"
+    ) as HTMLInputElement;
+    expect(templateInput.id).toBeTruthy();
+    const label = document.querySelector(`label[for="${templateInput.id}"]`);
+    expect(label).toBeTruthy();
+    expect(label?.textContent).toContain("Template");
+  });
+
+  it("each InputField generates a distinct id (no id collisions)", async () => {
+    await openDialog();
+    const inputs = [
+      screen.getByPlaceholderText("e.g. SEO Agent"),
+      screen.getByPlaceholderText("e.g. SEO Specialist"),
+      screen.getByPlaceholderText("e.g. 100"),
+      screen.getByPlaceholderText("e.g. seo-agent (from workspace-configs-templates/)"),
+    ] as HTMLInputElement[];
+
+    const ids = inputs.map((i) => i.id).filter(Boolean);
+    const unique = new Set(ids);
+    expect(unique.size).toBe(ids.length); // no duplicates
+    expect(ids.length).toBe(4);
+  });
+
+  it("Name label text contains the required asterisk indicator", async () => {
+    await openDialog();
+    const nameInput = screen.getByPlaceholderText("e.g. SEO Agent") as HTMLInputElement;
+    const label = document.querySelector(`label[for="${nameInput.id}"]`);
+    // aria-hidden asterisk * is present for visual required indicator
+    expect(label?.querySelector("[aria-hidden='true']")?.textContent).toBe("*");
+  });
 });
--- a/canvas/src/components/tests/CreateWorkspaceDialog.test.tsx
+++ b/canvas/src/components/tests/CreateWorkspaceDialog.test.tsx
@ -299,3 +299,85 @@ describe("CreateWorkspaceDialog — Hermes provider picker", () => {
    );
  });
 });
+
+// ---------------------------------------------------------------------------
+// budget_limit field tests (#541)
+// ---------------------------------------------------------------------------
+
+describe("CreateWorkspaceDialog — budget_limit field", () => {
+  it("renders a Budget limit (USD) input", async () => {
+    await openDialog();
+    const budgetInput = screen.getByPlaceholderText("e.g. 100");
+    expect(budgetInput).toBeTruthy();
+  });
+
+  it("renders helper text 'Leave blank for unlimited'", async () => {
+    await openDialog();
+    expect(screen.getByText("Leave blank for unlimited")).toBeTruthy();
+  });
+
+  it("sends budget_limit as a number when a value is entered", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
+      target: { value: "Budget Agent" },
+    });
+    fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
+      target: { value: "250" },
+    });
+    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
+    fireEvent.click(createBtn!);
+
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.budget_limit).toBe(250);
+  });
+
+  it("sends budget_limit as null when the field is left blank", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
+      target: { value: "Unlimited Agent" },
+    });
+    // Leave budget_limit empty
+    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
+    fireEvent.click(createBtn!);
+
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.budget_limit).toBeNull();
+  });
+
+  it("sends budget_limit as a float when a decimal value is entered", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
+      target: { value: "Float Budget Agent" },
+    });
+    fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
+      target: { value: "49.99" },
+    });
+    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
+    fireEvent.click(createBtn!);
+
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.budget_limit).toBeCloseTo(49.99);
+  });
+
+  it("resets budget_limit to empty when dialog is reopened", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. 100"), {
+      target: { value: "500" },
+    });
+
+    // Close dialog
+    const cancelBtn = screen.getAllByRole("button").find((b) =>
+      b.textContent === "Cancel"
+    );
+    fireEvent.click(cancelBtn!);
+    cleanup();
+
+    // Re-open
+    await openDialog();
+    const budgetInput = screen.getByPlaceholderText("e.g. 100") as HTMLInputElement;
+    expect(budgetInput.value).toBe("");
+  });
+});
--- a/canvas/src/components/tabs/BudgetSection.tsx
+++ b/canvas/src/components/tabs/BudgetSection.tsx
@ -0,0 +1,253 @@
+'use client';
+
+import { useState, useEffect, useCallback } from "react";
+import { api } from "@/lib/api";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface BudgetData {
+  budget_limit: number | null;
+  budget_used: number;
+  budget_remaining: number | null;
+}
+
+interface Props {
+  workspaceId: string;
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** True when an API error carries a 402 status code. */
+function isApiError402(e: unknown): boolean {
+  return e instanceof Error && /: 402( |$)/.test(e.message);
+}
+
+// ---------------------------------------------------------------------------
+// Component
+// ---------------------------------------------------------------------------
+
+/**
+ * BudgetSection — dedicated "Budget" section in the workspace details panel.
+ *
+ * - Fetches GET /workspaces/:id/budget on mount for live usage stats
+ * - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
+ * - Allows updating budget_limit via PATCH /workspaces/:id/budget
+ * - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
+ */
+export function BudgetSection({ workspaceId }: Props) {
+  const [budget, setBudget] = useState<BudgetData | null>(null);
+  const [loading, setLoading] = useState(true);
+  const [fetchError, setFetchError] = useState<string | null>(null);
+
+  const [limitInput, setLimitInput] = useState("");
+  const [saving, setSaving] = useState(false);
+  const [saveError, setSaveError] = useState<string | null>(null);
+
+  /** True when a 402 has been seen from any API call in this section. */
+  const [budgetExceeded, setBudgetExceeded] = useState(false);
+
+  // ── Fetch current budget data ─────────────────────────────────────────────
+
+  const loadBudget = useCallback(async () => {
+    setLoading(true);
+    setFetchError(null);
+    try {
+      const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
+      setBudget(data);
+      setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
+    } catch (e) {
+      if (isApiError402(e)) {
+        setBudgetExceeded(true);
+      } else {
+        setFetchError(e instanceof Error ? e.message : "Failed to load budget");
+      }
+    } finally {
+      setLoading(false);
+    }
+  }, [workspaceId]);
+
+  useEffect(() => {
+    loadBudget();
+  }, [loadBudget]);
+
+  // ── Save handler ──────────────────────────────────────────────────────────
+
+  const handleSave = async () => {
+    setSaving(true);
+    setSaveError(null);
+    const raw = limitInput.trim();
+    // Use explicit empty-string check (not falsy check) so that a
+    // user-entered "0" is sent as budget_limit: 0, not null (unlimited).
+    const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
+
+    try {
+      const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
+        budget_limit: parsedLimit,
+      });
+      setBudget(updated);
+      setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
+      // Clear exceeded state if the save succeeded (limit was raised or removed)
+      setBudgetExceeded(false);
+    } catch (e) {
+      if (isApiError402(e)) {
+        setBudgetExceeded(true);
+      } else {
+        setSaveError(e instanceof Error ? e.message : "Failed to save budget");
+      }
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  // ── Progress calculation ──────────────────────────────────────────────────
+
+  const progressPct =
+    budget && budget.budget_limit != null && budget.budget_limit > 0
+      ? Math.min(100, Math.round((budget.budget_used / budget.budget_limit) * 100))
+      : 0;
+
+  // ── Render ────────────────────────────────────────────────────────────────
+
+  return (
+    <div className="space-y-3" data-testid="budget-section">
+      {/* Section header */}
+      <div>
+        <h3 className="text-xs font-semibold text-zinc-400 uppercase tracking-wider">
+          Budget
+        </h3>
+        <p className="text-[11px] text-zinc-400 mt-0.5">
+          Limit total message credits for this workspace
+        </p>
+      </div>
+
+      {/* 402 exceeded banner */}
+      {budgetExceeded && (
+        <div
+          role="alert"
+          data-testid="budget-exceeded-banner"
+          className="flex items-center gap-2 px-3 py-2 rounded-lg bg-zinc-950 border border-amber-700/50 text-amber-400 text-xs font-medium"
+        >
+          <svg
+            width="13"
+            height="13"
+            viewBox="0 0 13 13"
+            fill="none"
+            aria-hidden="true"
+            className="shrink-0"
+          >
+            <path
+              d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
+              stroke="currentColor"
+              strokeWidth="1.4"
+              strokeLinejoin="round"
+            />
+            <path
+              d="M6.5 5.5V7.5M6.5 9.5h.01"
+              stroke="currentColor"
+              strokeWidth="1.4"
+              strokeLinecap="round"
+            />
+          </svg>
+          Budget exceeded — messages blocked
+        </div>
+      )}
+
+      {/* Usage stats */}
+      {loading ? (
+        <p className="text-xs text-zinc-500" data-testid="budget-loading">
+          Loading…
+        </p>
+      ) : fetchError ? (
+        <p className="text-xs text-red-400" data-testid="budget-fetch-error">
+          {fetchError}
+        </p>
+      ) : budget ? (
+        <div className="space-y-2">
+          {/* Stats row */}
+          <div className="flex items-baseline justify-between" data-testid="budget-stats-row">
+            <span className="text-xs text-zinc-400">Credits used</span>
+            <span className="text-xs font-mono text-zinc-300">
+              <span data-testid="budget-used-value">{budget.budget_used.toLocaleString()}</span>
+              <span className="text-zinc-500 mx-1">/</span>
+              <span data-testid="budget-limit-value">
+                {budget.budget_limit != null
+                  ? budget.budget_limit.toLocaleString()
+                  : "Unlimited"}
+              </span>
+            </span>
+          </div>
+
+          {/* Progress bar (only when limit is set) */}
+          {budget.budget_limit != null && (
+            <div
+              role="progressbar"
+              aria-label="Budget usage"
+              aria-valuenow={progressPct}
+              aria-valuemin={0}
+              aria-valuemax={100}
+              className="h-1.5 w-full rounded-full bg-zinc-800 overflow-hidden"
+            >
+              <div
+                data-testid="budget-progress-fill"
+                className="h-full rounded-full bg-blue-500 transition-all duration-300"
+                style={{ width: `${progressPct}%` }}
+              />
+            </div>
+          )}
+
+          {/* Remaining credits */}
+          {budget.budget_remaining != null && (
+            <p className="text-[11px] text-zinc-500" data-testid="budget-remaining">
+              {budget.budget_remaining.toLocaleString()} credits remaining
+            </p>
+          )}
+        </div>
+      ) : null}
+
+      {/* Input + Save */}
+      <div className="space-y-1.5 pt-1">
+        <label
+          htmlFor={`budget-limit-input-${workspaceId}`}
+          className="text-[11px] text-zinc-400 block"
+        >
+          Budget limit (credits)
+        </label>
+        <input
+          id={`budget-limit-input-${workspaceId}`}
+          type="number"
+          min="0"
+          step="1"
+          value={limitInput}
+          onChange={(e) => setLimitInput(e.target.value)}
+          placeholder="e.g. 1000 — blank for unlimited"
+          data-testid="budget-limit-input"
+          className="w-full bg-zinc-800 border border-zinc-700 rounded-lg px-3 py-2 text-sm text-zinc-300 placeholder-zinc-500 focus:outline-none focus:border-blue-500 focus:ring-1 focus:ring-blue-500/30 transition-colors"
+        />
+        <p className="text-xs text-zinc-500">Leave blank for unlimited</p>
+
+        {saveError && (
+          <div
+            role="alert"
+            data-testid="budget-save-error"
+            className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-red-400"
+          >
+            {saveError}
+          </div>
+        )}
+
+        <button
+          onClick={handleSave}
+          disabled={saving}
+          data-testid="budget-save-btn"
+          className="px-4 py-1.5 bg-blue-600 hover:bg-blue-500 active:bg-blue-700 rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors"
+        >
+          {saving ? "Saving…" : "Save"}
+        </button>
+      </div>
+    </div>
+  );
+}
--- a/canvas/src/components/tabs/ChannelsTab.tsx
+++ b/canvas/src/components/tabs/ChannelsTab.tsx
@ -141,19 +141,29 @@ export function ChannelsTab({ workspaceId }: Props) {
    }
  };

+  const [error, setError] = useState("");
+
  const handleToggle = async (ch: Channel) => {
-    await api.patch(`/workspaces/${workspaceId}/channels/${ch.id}`, {
-      enabled: !ch.enabled,
-    });
-    load();
+    try {
+      await api.patch(`/workspaces/${workspaceId}/channels/${ch.id}`, {
+        enabled: !ch.enabled,
+      });
+      load();
+    } catch (e: unknown) {
+      setError(e instanceof Error ? e.message : "Failed to toggle channel");
+    }
  };

  const confirmDelete = async () => {
    if (!pendingDelete) return;
    const ch = pendingDelete;
    setPendingDelete(null);
-    await api.del(`/workspaces/${workspaceId}/channels/${ch.id}`);
-    load();
+    try {
+      await api.del(`/workspaces/${workspaceId}/channels/${ch.id}`);
+      load();
+    } catch (e: unknown) {
+      setError(e instanceof Error ? e.message : "Failed to delete channel");
+    }
  };

  const handleTest = async (ch: Channel) => {
@ -188,6 +198,12 @@ export function ChannelsTab({ workspaceId }: Props) {
        </button>
      </div>

+      {error && (
+        <div className="px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-red-400">
+          {error}
+        </div>
+      )}
+
      {/* Create form */}
      {showForm && (
        <div className="space-y-2 p-3 bg-zinc-800/40 rounded border border-zinc-700/50">
--- a/canvas/src/components/tabs/DetailsTab.tsx
+++ b/canvas/src/components/tabs/DetailsTab.tsx
@ -4,6 +4,8 @@ import { useState, useEffect, useCallback } from "react";
 import { api } from "@/lib/api";
 import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
 import { StatusDot } from "../StatusDot";
+import { BudgetSection } from "./BudgetSection";
+import { WorkspaceUsage } from "../WorkspaceUsage";

 interface Props {
  workspaceId: string;
@ -59,7 +61,11 @@ export function DetailsTab({ workspaceId, data }: Props) {
    setSaving(true);
    setSaveError(null);
    try {
-      await api.patch(`/workspaces/${workspaceId}`, { name, role: role || null, tier });
+      await api.patch(`/workspaces/${workspaceId}`, {
+        name,
+        role: role || null,
+        tier,
+      });
      updateNodeData(workspaceId, { name, role: role || "", tier });
      setEditing(false);
    } catch (e) {
@ -145,7 +151,13 @@ export function DetailsTab({ workspaceId, data }: Props) {
                {saving ? "Saving..." : "Save"}
              </button>
              <button
-                onClick={() => { setEditing(false); setSaveError(null); setName(data.name); setRole(data.role || ""); setTier(data.tier); }}
+                onClick={() => {
+                  setEditing(false);
+                  setSaveError(null);
+                  setName(data.name);
+                  setRole(data.role || "");
+                  setTier(data.tier);
+                }}
                className="px-3 py-1 bg-zinc-700 hover:bg-zinc-600 text-xs rounded text-zinc-300"
              >
                Cancel
@ -190,6 +202,12 @@ export function DetailsTab({ workspaceId, data }: Props) {
        )}
      </Section>

+      {/* Budget — dedicated section with live usage stats (#541) */}
+      <BudgetSection workspaceId={workspaceId} />
+
+      {/* Token usage + spend — wired to GET /workspaces/:id/metrics (#592) */}
+      <WorkspaceUsage workspaceId={workspaceId} />
+
      {/* Agent Card / Skills */}
      {skills.length > 0 && (
        <Section title="Skills">
--- a/canvas/src/components/tabs/MemoryTab.tsx
+++ b/canvas/src/components/tabs/MemoryTab.tsx
@ -219,7 +219,7 @@ export function MemoryTab({ workspaceId }: Props) {
              Refresh
            </button>
            <button
-              onClick={() => setShowAdd(!showAdd)}
+              onClick={() => { setShowAdd(!showAdd); if (!showAdd) setShowAdvanced(true); }}
              className="px-2 py-1 bg-blue-600 hover:bg-blue-500 text-[10px] rounded text-white"
            >
              + Add
--- a/canvas/src/components/tabs/ScheduleTab.tsx
+++ b/canvas/src/components/tabs/ScheduleTab.tsx
@ -126,15 +126,23 @@ export function ScheduleTab({ workspaceId }: Props) {
    if (!pendingDelete) return;
    const { id } = pendingDelete;
    setPendingDelete(null);
-    await api.del(`/workspaces/${workspaceId}/schedules/${id}`);
-    fetchSchedules();
+    try {
+      await api.del(`/workspaces/${workspaceId}/schedules/${id}`);
+      fetchSchedules();
+    } catch (e: unknown) {
+      setError(e instanceof Error ? e.message : "Failed to delete schedule");
+    }
  };

  const handleToggle = async (sched: Schedule) => {
-    await api.patch(`/workspaces/${workspaceId}/schedules/${sched.id}`, {
-      enabled: !sched.enabled,
-    });
-    fetchSchedules();
+    try {
+      await api.patch(`/workspaces/${workspaceId}/schedules/${sched.id}`, {
+        enabled: !sched.enabled,
+      });
+      fetchSchedules();
+    } catch (e: unknown) {
+      setError(e instanceof Error ? e.message : "Failed to toggle schedule");
+    }
  };

  const handleEdit = (sched: Schedule) => {
--- a/canvas/src/components/tabs/TracesTab.tsx
+++ b/canvas/src/components/tabs/TracesTab.tsx
@ -68,11 +68,14 @@ export function TracesTab({ workspaceId }: Props) {

      {traces.length === 0 && !error ? (
        <div className="text-center py-8">
-          <div className="text-2xl opacity-20 mb-2">📊</div>
+          <div className="text-2xl opacity-20 mb-2" aria-hidden="true">--</div>
          <p className="text-xs text-zinc-600">No traces yet</p>
-          <p className="text-[10px] text-zinc-700 mt-1">
-            Set LANGFUSE_HOST, LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY to enable tracing
-          </p>
+          <details className="mt-2 text-[10px] text-zinc-700">
+            <summary className="cursor-pointer text-zinc-500 hover:text-zinc-400">How to enable tracing</summary>
+            <p className="mt-1">
+              Set <code className="font-mono text-zinc-400">LANGFUSE_HOST</code>, <code className="font-mono text-zinc-400">LANGFUSE_PUBLIC_KEY</code>, <code className="font-mono text-zinc-400">LANGFUSE_SECRET_KEY</code> as workspace secrets to enable tracing.
+            </p>
+          </details>
        </div>
      ) : (
        <div className="space-y-1">
--- a/canvas/src/components/ui/RevealToggle.tsx
+++ b/canvas/src/components/ui/RevealToggle.tsx
@ -1,4 +1,4 @@
-'use client';
+"use client";

 interface RevealToggleProps {
  revealed: boolean;
--- a/canvas/src/lib/deploy-preflight.ts
+++ b/canvas/src/lib/deploy-preflight.ts
@ -17,6 +17,8 @@ export const RUNTIME_REQUIRED_KEYS: Record<string, string[]> = {
  deepagents: ["OPENAI_API_KEY"],
  crewai: ["OPENAI_API_KEY"],
  autogen: ["OPENAI_API_KEY"],
+  hermes: ["OPENROUTER_API_KEY"],
+  "gemini-cli": ["GOOGLE_API_KEY"],
 };

 /** Human-readable labels for common secret keys */
@ -26,6 +28,8 @@ export const KEY_LABELS: Record<string, string> = {
  GOOGLE_API_KEY: "Google AI API Key",
  SERP_API_KEY: "SERP API Key",
  OPENROUTER_API_KEY: "OpenRouter API Key",
+  HERMES_API_KEY: "Nous Research API Key",
+  DEEPSEEK_API_KEY: "DeepSeek API Key",
 };

 /* ---------- Types ---------- */
--- a/canvas/src/store/tests/canvas.test.ts
+++ b/canvas/src/store/tests/canvas.test.ts
@ -719,6 +719,33 @@ describe("misc state setters", () => {
  });
 });

+// ---------- hydrationError (#554) ----------
+
+describe("hydrationError", () => {
+  it("initial value is null", () => {
+    expect(useCanvasStore.getState().hydrationError).toBeNull();
+  });
+
+  it("setHydrationError stores an error message", () => {
+    useCanvasStore.getState().setHydrationError("Network timeout");
+    expect(useCanvasStore.getState().hydrationError).toBe("Network timeout");
+  });
+
+  it("setHydrationError(null) clears the error", () => {
+    useCanvasStore.getState().setHydrationError("Some error");
+    useCanvasStore.getState().setHydrationError(null);
+    expect(useCanvasStore.getState().hydrationError).toBeNull();
+  });
+
+  it("setHydrationError does not affect other state", () => {
+    useCanvasStore.getState().hydrate([makeWS({ id: "ws-x", name: "X" })]);
+    useCanvasStore.getState().setHydrationError("oops");
+    // Nodes should still be intact
+    expect(useCanvasStore.getState().nodes).toHaveLength(1);
+    expect(useCanvasStore.getState().nodes[0].id).toBe("ws-x");
+  });
+});
+
 // ---------- ACTIVITY_LOGGED event ----------

 describe("ACTIVITY_LOGGED event", () => {
--- a/canvas/src/store/canvas-topology.ts
+++ b/canvas/src/store/canvas-topology.ts
@ -142,6 +142,8 @@ export function buildNodesAndEdges(
        currentTask: ws.current_task || "",
        runtime: ws.runtime || "",
        needsRestart: false,
+        budgetLimit: ws.budget_limit ?? null,
+        budgetUsed: ws.budget_used ?? null,
      },
      // Hide child nodes from canvas — they render inside the parent WorkspaceNode
      hidden: !!ws.parent_id,
--- a/canvas/src/store/canvas.ts
+++ b/canvas/src/store/canvas.ts
@ -29,6 +29,10 @@ export interface WorkspaceNodeData extends Record<string, unknown> {
  currentTask: string;
  runtime: string;
  needsRestart: boolean;
+  /** USD spend ceiling set by the user; null = unlimited. Added by issue #541. */
+  budgetLimit: number | null;
+  /** Cumulative USD spend. Present when the platform tracks spend (issue #541). */
+  budgetUsed?: number | null;
 }

 export type PanelTab = "details" | "skills" | "chat" | "terminal" | "config" | "schedule" | "channels" | "files" | "memory" | "traces" | "events" | "activity";
@ -73,6 +77,9 @@ interface CanvasState {
  /** WebSocket connection status — drives the live indicator in the Toolbar. */
  wsStatus: "connected" | "connecting" | "disconnected";
  setWsStatus: (status: "connected" | "connecting" | "disconnected") => void;
+  /** Hydration error message — set when initial canvas load fails. Null when no error. */
+  hydrationError: string | null;
+  setHydrationError: (error: string | null) => void;
 }

 export const useCanvasStore = create<CanvasState>((set, get) => ({
@ -84,6 +91,8 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
  contextMenu: null,
  wsStatus: "connecting",
  setWsStatus: (status) => set({ wsStatus: status }),
+  hydrationError: null,
+  setHydrationError: (error) => set({ hydrationError: error }),

  viewport: { x: 0, y: 0, zoom: 1 },

--- a/canvas/src/store/socket.ts
+++ b/canvas/src/store/socket.ts
@ -118,6 +118,10 @@ export interface WorkspaceData {
  x: number;
  y: number;
  collapsed: boolean;
+  /** USD spend ceiling set by the user; null = unlimited. Added by issue #541. */
+  budget_limit: number | null;
+  /** Cumulative USD spend for this workspace. Present when the platform tracks spend. */
+  budget_used?: number | null;
 }

 let socket: ReconnectingSocket | null = null;
--- a/docs/blog/2026-04-17-deploy-anywhere/index.md
+++ b/docs/blog/2026-04-17-deploy-anywhere/index.md
@ -0,0 +1,108 @@
+---
+title: "Deploy AI Agents on Fly.io — or Any Cloud — with One Config Change"
+date: 2026-04-17
+slug: deploy-anywhere
+description: "Molecule AI supports fly.io agent deployment and control-plane provisioning. Switch backends with one env var — no agent code changes required."
+tags: [platform, fly.io, deployment, infrastructure]
+---
+
+# Deploy AI Agents on Fly.io — or Any Cloud — with One Config Change
+
+Your infrastructure choice just got decoupled from your agent platform choice. Molecule AI now ships three production-ready workspace backends — `docker`, `flyio`, and `controlplane` — and switching between them takes a single environment variable. Your agent code, model choices, and workspace topology stay exactly the same.
+
+This post covers what shipped in [PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner), and which backend fits your situation.
+
+## Before: One Deployment Model for Every Use Case
+
+Until this week, Molecule AI workspaces ran on one backend: Docker. That was the right default for self-hosters — no external dependencies, full control, works anywhere a Docker daemon runs. But it left two groups making a compromise they shouldn't have to:
+
+- **Indie developers and small teams** wanted Fly.io's economics: pay-per-use compute, fast cold starts, scale to zero when nobody's working.
+- **SaaS builders** needed structural credential isolation. A Fly API token sitting in the tenant layer is one misconfiguration away from a security incident — not a policy problem, an architecture problem.
+
+Both groups were choosing between "use the platform" and "get the deployment model I need." That trade-off is gone.
+
+## Run AI Agents on Fly: The Indie Dev Path
+
+You're already on Fly. You have an account, a Fly app, and you're comfortable with Machines. You want Molecule AI workspaces to provision as Fly Machines — no separate Docker host, no idle infrastructure, just workspaces that appear when needed and disappear when they don't.
+
+Set three environment variables on your tenant platform instance:
+
+```bash
+CONTAINER_BACKEND=flyio
+FLY_API_TOKEN=<your-fly-deploy-token>
+FLY_WORKSPACE_APP=<your-fly-app-name>
+
+# Optional — defaults to ord
+FLY_REGION=ord
+```
+
+When a workspace is created, the Fly provisioner:
+
+1. Spins up a Fly Machine inside your `FLY_WORKSPACE_APP`
+2. Injects workspace secrets and the platform registration URL as machine env vars
+3. Selects the right GHCR image for the runtime (`hermes` → `ghcr.io/molecule-ai/workspace-hermes:latest`, and so on)
+4. Applies tier-based resource limits — T2 at 512 MB / 1 vCPU, T3 at 2 GB / 2 vCPU, T4 at 4 GB / 4 vCPU
+5. Issues a boot-time auth token so the workspace agent can register with the platform immediately
+
+Your workspaces run as first-class Fly Machines. When they're idle, Fly handles the scale-down. Your bill reflects actual usage, not reserved capacity.
+
+## Multi-Tenant Agent Provisioning Without Credential Sprawl
+
+You're building a SaaS product on top of Molecule AI. Each customer gets a Molecule workspace. The problem: if every tenant platform instance carries a `FLY_API_TOKEN`, you've distributed cloud credentials across your tenants — structurally. Policy controls help, but they don't remove the credential from the attack surface.
+
+`CONTAINER_BACKEND=controlplane` removes it entirely.
+
+```
+Canvas → Tenant Platform → Control Plane API → Fly Machines API
+```
+
+The tenant platform never holds a Fly token. It calls the Molecule control plane at `https://api.moleculesai.app` (overridable via `CP_PROVISION_URL` for staging environments), which holds Fly credentials and orchestrates workspace provisioning centrally.
+
+For standard SaaS deployments, you don't configure this manually — the platform auto-detects the right backend:
+
+- `MOLECULE_ORG_ID` set → SaaS tenant → **control plane provisioner activates automatically**
+- `MOLECULE_ORG_ID` empty → self-hosted → **Docker provisioner, no change needed**
+
+The right backend is the default for your context. For most SaaS builders: set `MOLECULE_ORG_ID` at tenant launch, and credential isolation is structural from day one.
+
+## Self-Hosted vs Cloud AI Agents: Backend Comparison
+
+| Backend | `CONTAINER_BACKEND` | Best for | Who holds cloud credentials |
+|---|---|---|---|
+| **Docker** | *(empty / default)* | Self-hosted, local dev | No external credentials needed |
+| **Fly Machines** | `flyio` | Indie devs / small teams on Fly | `FLY_API_TOKEN` lives on the tenant |
+| **Control Plane** | `controlplane` | SaaS builders, multi-tenant products | Fly token held by control plane only — never on tenant |
+
+**Fly backend env vars** (for `CONTAINER_BACKEND=flyio`):
+
+| Variable | Required | Default | What it does |
+|---|---|---|---|
+| `CONTAINER_BACKEND` | Yes | — | Activates the Fly provisioner |
+| `FLY_API_TOKEN` | Yes | — | Fly deploy token |
+| `FLY_WORKSPACE_APP` | Yes | — | Fly app that hosts workspace machines |
+| `FLY_REGION` | No | `ord` | Region for new machines |
+
+## Agent Orchestration in the Cloud: What Doesn't Change
+
+Switching backends changes where workspaces run, not how they work. From any agent runtime's perspective — Hermes, Letta, or whatever you're orchestrating — the workspace is the workspace. Unchanged across all three backends:
+
+- Agent registration and boot sequence
+- Model routing and provider dispatch
+- Workspace secrets injection
+- The full platform API surface
+
+No changes to agent code, tool definitions, or orchestration logic. Swap `CONTAINER_BACKEND`, redeploy, done.
+
+## Multi-Agent Cloud Deployment: Choose Your Path
+
+- **Self-hosting?** Leave `CONTAINER_BACKEND` unset. Docker is the default — nothing to configure.
+- **On Fly, small team?** Set `CONTAINER_BACKEND=flyio` with `FLY_API_TOKEN` and `FLY_WORKSPACE_APP`. Workspaces become Fly Machines in your own Fly account.
+- **Building a SaaS product on Molecule AI?** Set `MOLECULE_ORG_ID` at tenant launch. The control plane provisioner activates automatically. No Fly credentials on the tenant, ever.
+
+**Pick your backend. Deploy your agents.**
+
+→ [Quickstart: choose your deployment backend](/docs/quickstart)
+
+---
+
+*[PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner) are both merged to `main`. Molecule AI is open source — contributions welcome.*
--- a/docs/ecosystem-watch.md
+++ b/docs/ecosystem-watch.md
@ -37,6 +37,452 @@ about where our differentiation actually is.

 ---

+## Competitor Snapshot
+
+> **Machine-readable index for PMM cron diffing.** One YAML entry per competitor —
+> the cron diffs this block to detect version bumps, threat escalations, and new
+> `notable_changes`, then updates `docs/marketing/competitors.md`.
+>
+> **Maintenance rule:** whenever you update a narrative entry below, also bump the
+> corresponding `date`, `version`, and `notable_changes` fields here.
+>
+> Fields: `name` · `slug` · `date` (last reviewed) · `version` · `stars` ·
+> `threat_level` (high / medium / low) · `notable_changes` (≤ 2 sentences) · `source_url`
+
+```yaml
+# competitor-snapshot
+# Generated: 2026-04-17 | Maintainer: Research Lead
+# PMM cron reads this block, diffs vs. previous commit, updates docs/marketing/competitors.md.
+# Update date + version + notable_changes whenever a competitor ships something significant.
+
+snapshots:
+
+  # ── HIGH THREAT ────────────────────────────────────────────────────────────────────
+  # Direct substitutes or major market-erosion risk for Molecule AI.
+
+  - name: Paperclip
+    slug: paperclip
+    date: "2026-04-17"
+    version: "v2026.416.0"
+    stars: "54.8k"
+    threat_level: medium
+    notable_changes: >
+      Downgraded HIGH → MEDIUM (2026-04-17, deep-dive #571): no A2A protocol,
+      no visual canvas, no org-chart UI on roadmap. Blocker dependencies are
+      single-process task-graph DAG, not inter-agent coordination. Execution
+      policies are budget ceilings, not tool restrictions. Only capability gap
+      vs Molecule AI is per-workspace budget limits (tracked #541). Brand/
+      framing threat ("zero-human companies") but not a technical substitute.
+      v2026.416.0 (Apr 16) ships chat threads + execution policies.
+    source_url: https://github.com/paperclipai/paperclip/releases
+
+  - name: OpenAI Agents SDK
+    slug: openai-agents-sdk
+    date: "2026-04-17"
+    version: "v0.14.1"
+    stars: "14k"
+    threat_level: high
+    notable_changes: >
+      v0.14.1 (Apr 15 2026) patches tracing export on top of v0.14.0's
+      SandboxAgent beta — persistent isolated workspaces, snapshot/resume,
+      and sandbox memory directly competing with our workspace lifecycle model.
+    source_url: https://github.com/openai/openai-agents-python/releases
+
+  - name: CrewAI
+    slug: crewai
+    date: "2026-04-17"
+    version: "v1.14.1"
+    stars: "48k"
+    threat_level: high
+    notable_changes: >
+      v1.14.1 (Apr 8 2026) adds async checkpoint TUI browser; 1.4B agentic
+      automations logged, 60% Fortune 500 adoption, and $18M Insight-led round
+      make CrewAI Enterprise the dominant multi-agent framework in our target
+      enterprise segment.
+    source_url: https://github.com/crewAIInc/crewAI/releases
+
+  - name: Google ADK
+    slug: google-adk
+    date: "2026-04-17"
+    version: "v1.30.0"
+    stars: "19k"
+    threat_level: high
+    notable_changes: >
+      v1.30.0 (Apr 13 2026) adds Auth Provider support to the agent registry,
+      Parameter Manager integration, and Gemma 4 model support; v2.0.0a3
+      pre-release introduces a graph-based execution engine.
+    source_url: https://github.com/google/adk-python/releases
+
+  - name: Microsoft Agent Framework
+    slug: microsoft-agent-framework
+    date: "2026-04-17"
+    version: "python-1.0.1"
+    stars: "9.5k"
+    threat_level: high
+    notable_changes: >
+      v1.0 GA (Apr 7 2026): multi-agent orchestration (sequential, concurrent,
+      group-chat, handoff, magnetic patterns), native A2A+MCP, OpenTelemetry,
+      pause/resume durability, HITL approvals. AG-UI protocol for SSE-streaming
+      agent events to frontends — direct competitor to our WebSocket canvas.
+      Process Framework GA planned Q2 2026. Molecule gap: AG-UI SSE endpoint,
+      tool governance registry, cost transparency per workspace.
+    source_url: https://github.com/microsoft/agent-framework/releases
+
+  # ── MEDIUM THREAT ──────────────────────────────────────────────────────────────────
+  # Significant overlap in adjacent space; no direct substitution risk today.
+
+  - name: Dify
+    slug: dify
+    date: "2026-04-17"
+    version: "v1.13.3"
+    stars: "60k"
+    threat_level: medium
+    notable_changes: >
+      Latest stable is v1.13.3 (Mar 27 2026); v1.14.0 RC adds Human Input
+      node (HITL); raised $30M Pre-A (Mar 2026, $180M valuation) with
+      280 enterprise deployments — no-code positioning targets business users,
+      not our developer audience.
+    source_url: https://github.com/langgenius/dify/releases
+
+  - name: LangGraph
+    slug: langgraph
+    date: "2026-04-17"
+    version: "v1.1.6"
+    stars: "29k"
+    threat_level: medium
+    notable_changes: >
+      langgraph-cli v0.4.22 (Apr 16 2026) adds deploy source tracking;
+      core v1.1.6 (Apr 10 2026) ships LangGraph 2.0 declarative guardrail nodes;
+      LangGraph Cloud hosted execution competes with our scheduler.
+    source_url: https://github.com/langchain-ai/langgraph/releases
+
+  - name: VoltAgent
+    slug: voltagent
+    date: "2026-04-17"
+    version: "server-elysia@2.0.7"
+    stars: "8.2k"
+    threat_level: medium
+    notable_changes: >
+      @voltagent/server-elysia v2.0.7 (Apr 11 2026) fixes A2A agent card
+      endpoints to advertise correct absolute URLs; VoltOps Console is the
+      closest Canvas analogue in the TypeScript ecosystem.
+    source_url: https://github.com/VoltAgent/voltagent/releases
+
+  - name: n8n
+    slug: n8n
+    date: "2026-04-17"
+    version: "v2.17.2"
+    stars: "50k"
+    threat_level: medium
+    notable_changes: >
+      v2.17.2 (Apr 16 2026) improves AI Gateway credentials endpoint;
+      n8n 2.0 (Dec 2025) added enterprise-grade AI Agent nodes, RBAC, SSO,
+      and 400+ channel integrations — direct overlap with our workspace_channels.
+    source_url: https://github.com/n8n-io/n8n/releases
+
+  - name: Claude Code Routines
+    slug: claude-code-routines
+    date: "2026-04-17"
+    version: "cloud-feature"
+    stars: "n/a"
+    threat_level: medium
+    notable_changes: >
+      Launched Apr 14 2026 (research preview): Anthropic-hosted cron + GitHub-
+      event-triggered Claude Code sessions running on Anthropic cloud; competes
+      with our workspace_schedules; single-model, no org canvas.
+    source_url: https://code.claude.com/docs/en/routines
+
+  - name: Scion
+    slug: scion
+    date: "2026-04-17"
+    version: "active"
+    stars: "early"
+    threat_level: medium
+    notable_changes: >
+      Launched Apr 8 2026 — GCP experimental container-per-agent harness for
+      Claude Code/Gemini CLI with parallel isolated workspaces and markdown
+      workflow definitions; escalation risk to HIGH if productized by Google.
+    source_url: https://github.com/GoogleCloudPlatform/scion
+
+  - name: Multica
+    slug: multica
+    date: "2026-04-17"
+    version: "active-36-releases"
+    stars: "12.8k"
+    threat_level: medium
+    notable_changes: >
+      Positioned as open-source Claude Managed Agents alternative (Apr 2026);
+      local daemon + central backend with pgvector semantic skill compounding;
+      +1,503 stars/day at launch — no A2A or org canvas but similar architecture.
+    source_url: https://github.com/multica-ai/multica/releases
+
+  - name: Cline
+    slug: cline
+    date: "2026-04-17"
+    version: "active"
+    stars: "44k"
+    threat_level: medium
+    notable_changes: >
+      VS Code Claude Code extension with 44k ⭐ and MCP support; primary user
+      overlap with our Claude Code workspace — developers who outgrow Cline's
+      single-session model are our conversion path.
+    source_url: https://github.com/cline/cline/releases
+
+  - name: ClawRun
+    slug: clawrun
+    date: "2026-04-17"
+    version: "active-45-releases"
+    stars: "84"
+    threat_level: medium
+    notable_changes: >
+      Closest architectural match tracked — sandbox/heartbeat/snapshot-resume/
+      channels/cost-tracking feature parity with us; 84 ⭐ but 45 releases
+      shows active shipping; adding A2A would make this a direct lightweight
+      competitor.
+    source_url: https://github.com/clawrun-sh/clawrun/releases
+
+  - name: Gemini CLI
+    slug: gemini-cli
+    date: "2026-04-17"
+    version: "v0.38.1"
+    stars: "101k"
+    threat_level: medium
+    notable_changes: >
+      v0.38.1 (Apr 15 2026) is a cherry-pick stability patch; 1M-token context
+      + MCP support; runtime candidate for our workspace adapter — elevated to
+      MEDIUM because it forms a full agent stack with Google ADK + adk-web.
+    source_url: https://github.com/google-gemini/gemini-cli/releases
+
+  # ── LOW THREAT ─────────────────────────────────────────────────────────────────────
+  # Tools, infra layers, single-agent tools, or products we use — not substitutes.
+
+  - name: Hermes Agent
+    slug: hermes-agent
+    date: "2026-04-17"
+    version: "v0.10.0"
+    stars: "61k"
+    threat_level: low
+    notable_changes: >
+      v0.10.0 (Apr 16 2026) launches Tool Gateway giving paid Portal subscribers
+      built-in web search, image generation, TTS, and browser automation; no
+      multi-agent or org hierarchy — personal AI shape, not platform competitor.
+    source_url: https://github.com/NousResearch/hermes-agent/releases
+
+  - name: gstack
+    slug: gstack
+    date: "2026-04-17"
+    version: "active"
+    stars: "70k"
+    threat_level: low
+    notable_changes: >
+      Viral Claude Code skills bundle with 70k ⭐; sequential single-session
+      persona-switching — no persistent infra, Docker isolation, or A2A protocol;
+      differentiation holds unless multi-session execution is added.
+    source_url: https://github.com/garrytan/gstack
+
+  - name: Flowise
+    slug: flowise
+    date: "2026-04-17"
+    version: "flowise@3.1.2"
+    stars: "30k"
+    threat_level: low
+    notable_changes: >
+      v3.1.2 (Apr 14 2026) delivers security hardening (CORS abuse, credential
+      leaks, unauthorized access); acquired by Workday (Aug 2025) — repositioned
+      for HR/finance enterprise, narrowing its developer-team market.
+    source_url: https://github.com/FlowiseAI/Flowise/releases
+
+  - name: OpenHands
+    slug: openhands
+    date: "2026-04-17"
+    version: "v1.6.0"
+    stars: "47k"
+    threat_level: low
+    notable_changes: >
+      v1.6.0 (Mar 30 2026) adds hook support and /clear command preserving
+      sandbox runtime; jumped to v1.x series (was v0.39.0); SWE-Bench top
+      open-source rank — single-agent software engineer, not a platform.
+    source_url: https://github.com/All-Hands-AI/OpenHands/releases
+
+  - name: Temporal
+    slug: temporal
+    date: "2026-04-17"
+    version: "v1.30.4"
+    stars: "13k"
+    threat_level: low
+    notable_changes: >
+      v1.30.4 (Apr 10 2026) patches CVE-2026-5724 MEDIUM authorization
+      vulnerability; $300M Series D (Feb 2026, $5B valuation); we integrate
+      Temporal as infra via workspace-template/builtin_tools/temporal_workflow.py.
+    source_url: https://github.com/temporalio/temporal/releases
+
+  - name: Chrome DevTools MCP
+    slug: chrome-devtools-mcp
+    date: "2026-04-17"
+    version: "active"
+    stars: "35.5k"
+    threat_level: low
+    notable_changes: >
+      Official ChromeDevTools org MCP server with 23 browser-control tools;
+      replaces our bespoke Puppeteer CDP plugin — we adopt it as of issue #540.
+    source_url: https://github.com/ChromeDevTools/chrome-devtools-mcp
+
+  - name: Composio
+    slug: composio
+    date: "2026-04-17"
+    version: "active"
+    stars: "18k"
+    threat_level: low
+    notable_changes: >
+      250+ tool integrations with managed auth; potential skill-pack dependency
+      for workspace channel integrations rather than a competing platform.
+    source_url: https://github.com/composio-dev/composio/releases
+
+  - name: AgentScope
+    slug: agentscope
+    date: "2026-04-17"
+    version: "v1.0.18"
+    stars: "23.8k"
+    threat_level: low
+    notable_changes: >
+      v1.0.18 (Mar 26 2026) from Alibaba/ModelScope with MsgHub typed routing
+      and OpenTelemetry; MCP integration; no deployment layer — framework only.
+    source_url: https://github.com/modelscope/agentscope/releases
+
+  - name: Skills CLI
+    slug: skills-cli
+    date: "2026-04-17"
+    version: "active"
+    stars: "14.2k"
+    threat_level: low
+    notable_changes: >
+      Vercel-backed canonical agentskills.io install CLI covering 45+ agents
+      including our Claude Code workspace; aligning plugins/ manifest to the
+      agentskills.io spec gives us free distribution through this channel.
+    source_url: https://github.com/vercel-labs/skills
+
+  - name: Archon
+    slug: archon
+    date: "2026-04-17"
+    version: "v0.3.6"
+    stars: "18.1k"
+    threat_level: low
+    notable_changes: >
+      v0.3.6 active; YAML-DAG coding workflow with mixed AI/deterministic nodes
+      and human approval gates; reference design for our workspace delivery
+      pipelines — no multi-agent coordination.
+    source_url: https://github.com/coleam00/Archon/releases
+
+  - name: Tencent AI-Infra-Guard
+    slug: tencent-ai-infra-guard
+    date: "2026-04-17"
+    version: "v4.1.3"
+    stars: "3.5k"
+    threat_level: low
+    notable_changes: >
+      v4.1.3 (Apr 9 2026); red team platform scanning MCP server and skills
+      surfaces — use as security compliance checklist for our MCP server and
+      plugin registry hardening; not a runtime competitor.
+    source_url: https://github.com/Tencent/AI-Infra-Guard/releases
+
+  - name: Holaboss
+    slug: holaboss
+    date: "2026-04-17"
+    version: "active"
+    stars: "1.7k"
+    threat_level: low
+    notable_changes: >
+      Desktop "AI employee" with filesystem-as-memory and compaction boundaries;
+      single-agent, no A2A — primary concern is terminology collisions
+      (workspace / MEMORY.md / SKILL.md / agentskills.io).
+    source_url: https://github.com/holaboss-ai/holaboss-ai
+
+  - name: claude-mem
+    slug: claude-mem
+    date: "2026-04-17"
+    version: "active"
+    stars: "56k"
+    threat_level: low
+    notable_changes: >
+      SQLite FTS5 + Chroma hybrid cross-session memory with lifecycle hooks;
+      56k ⭐ signals strong demand for the gap we need to close in agent_memories
+      — adopt PostToolUse + SessionEnd observation pipeline.
+    source_url: https://github.com/thedotmack/claude-mem
+
+  - name: Plannotator
+    slug: plannotator
+    date: "2026-04-17"
+    version: "v0.17.10"
+    stars: "4.3k"
+    threat_level: low
+    notable_changes: >
+      v0.17.10 (Apr 13 2026); HITL plan annotation UX with structured feedback
+      types (delete/insert/replace/comment); reference design for improving our
+      approvals API response schema.
+    source_url: https://github.com/backnotprop/plannotator/releases
+
+  - name: open-multi-agent
+    slug: open-multi-agent
+    date: "2026-04-17"
+    version: "v1.1.0"
+    stars: "5.7k"
+    threat_level: low
+    notable_changes: >
+      v1.1.0 (Apr 1 2026); TypeScript multi-agent with runtime goal-to-DAG
+      decomposition in 3 deps; ephemeral per-run — no persistent identity,
+      no canvas, no scheduling.
+    source_url: https://github.com/JackChen-me/open-multi-agent/releases
+
+  - name: Open Agents (Vercel)
+    slug: open-agents-vercel
+    date: "2026-04-17"
+    version: "active"
+    stars: "2.2k"
+    threat_level: low
+    notable_changes: >
+      +1,020 stars in one day (Apr 15 2026); Vercel Labs reference app for
+      background coding agents with snapshot-based VM resumption; no multi-
+      agent coordination — reference template, not a platform.
+    source_url: https://github.com/vercel-labs/open-agents
+
+  - name: GenericAgent
+    slug: generic-agent
+    date: "2026-04-17"
+    version: "v1.0"
+    stars: "2.1k"
+    threat_level: low
+    notable_changes: >
+      v1.0 (Jan 16 2026); self-evolving skill tree with four-tier memory
+      hierarchy (L0 rules → L4 session archives); single-agent, no A2A —
+      memory taxonomy worth borrowing for agent_memories scopes.
+    source_url: https://github.com/lsdefine/GenericAgent/releases
+
+  - name: OpenSRE
+    slug: opensre
+    date: "2026-04-17"
+    version: "active"
+    stars: "900"
+    threat_level: low
+    notable_changes: >
+      AI SRE toolkit with 40+ observability integrations (Grafana/Datadog/
+      K8s/AWS/GCP/PagerDuty); potential DevOps workspace skill-pack source
+      rather than a competing platform.
+    source_url: https://github.com/Tracer-Cloud/opensre
+
+  - name: AMD GAIA
+    slug: amd-gaia
+    date: "2026-04-17"
+    version: "v0.17.2"
+    stars: "1.2k"
+    threat_level: low
+    notable_changes: >
+      v0.17.2 (Apr 10 2026); AMD-backed local agent framework hardware-locked
+      to Ryzen AI 300+ NPU; MCP support; not general-purpose.
+    source_url: https://github.com/amd/gaia/releases
+```
+
+---
+
 ## Entries

 ### Holaboss — `holaboss-ai/holaboss-ai`
@ -870,9 +1316,9 @@ builders; Molecule AI users are developers building agent companies.

 **Terminology collisions:** "middleware" — their processing pipeline hook; undefined in our platform. "graph" — their workflow DAG vs our live org chart (same word, different semantics).

-**Signals to react to:** If AF 1.0 achieves enterprise adoption → update our autogen adapter to target `microsoft/agent-framework`. If AF Labs RL ships stable → evaluate for dynamic PM routing based on workspace performance history.
+**Signals to react to:** AF 1.0 GA shipped April 7 with AG-UI (SSE protocol for streaming agent events to frontends). AG-UI is a direct competitor to our WebSocket canvas events — if AG-UI becomes a standard, we need an AG-UI-compatible SSE endpoint to attract MAF users. Process Framework GA in Q2 2026 will add visual workflow design — evaluate overlap with our Canvas. Google's private Tool Registry (Vertex AI) sets an enterprise expectation for tool governance that we should match with per-org curated plugin registries.

-**Last reviewed:** 2026-04-15 · **Stars / activity:** ~9.5k ⭐, April 2026 .NET release, official AutoGen successor
+**Last reviewed:** 2026-04-17 · **Stars / activity:** ~9.5k ⭐, v1.0 GA April 7 2026, AG-UI protocol announced

 ---

@ -1202,22 +1648,475 @@ Complementary by design.

 **Last reviewed:** 2026-04-16 · **Stars / activity:** ~35.5k ⭐, ChromeDevTools org, Apache-2.0

+---
+
+### LangGraph — `langchain-ai/langgraph`
+
+**Pitch:** "Build resilient language agents as graphs — stateful, multi-actor
+applications with fine-grained control over agent flow."
+
+**Shape:** Python + JavaScript/TypeScript library (MIT), ~29k ⭐, v1.1.6 released
+April 10 2026. Part of the LangChain ecosystem. Agents are modelled as directed
+graphs: nodes are callables (LLM calls, tool calls, conditional branches), edges are
+routing rules, and a persistent **state schema** carries data between nodes.
+Checkpointing (memory persistence across turns) is built in via a pluggable
+`Checkpointer` interface (in-memory, SQLite, Postgres, Redis). Multi-agent
+compositions via subgraph nodes. LangGraph Cloud offers hosted execution backed by
+LangSmith observability. LangGraph 2.0 GA shipped February 2026, adding declarative
+guardrail nodes (content filtering, rate limiting, audit logging as config).
+
+**Overlap with us:** Molecule AI ships a `langgraph` runtime adapter
+(`molecule-ai-workspace-template-langgraph`) — this is us *on top of* LangGraph.
+Their graph model (nodes, edges, state) is structurally analogous to our workspace
+hierarchy (workspaces, A2A calls, shared context). Their `Checkpointer` is the
+lower-level equivalent of our `agent_memories` table. LangGraph Cloud's hosted
+execution competes directly with our scheduler + workspace lifecycle.
+
+**Differentiation:** LangGraph is a framework for *building* the logic of one agent
+or pipeline; Molecule AI is a platform for *deploying and coordinating* long-lived
+agents as an org. LangGraph has no concept of Docker workspace isolation, org-chart
+hierarchy, inter-agent A2A protocol, channel integrations, visual canvas, or cron
+scheduling. Our langgraph adapter *runs on top of* LangGraph — they're layered, not
+competing, for most use cases. The gap is LangGraph Cloud vs our hosted platform.
+
+**Worth borrowing:**
+- **Declarative guardrail nodes** (v2.0) — content filtering and audit logging as
+  first-class graph nodes rather than custom code. Map to our `approvals` table:
+  add declarative gate types (content-filter, rate-limit) in workspace config.
+- **Subgraph composition** — composing multi-agent pipelines by nesting graphs.
+  Our workspace parent/child hierarchy is the operational equivalent; study for
+  dynamic sub-workspace spawning UX.
+- **Checkpointer interface** — the pluggable backend design (SQLite → Postgres →
+  Redis hot path) is the right abstraction for our `agent_memories` persistence layer.
+
+**Terminology collisions:**
+- "state" — LangGraph: the typed dict carried between graph nodes; ours: workspace
+  status (online/offline/degraded). No user confusion but docs should disambiguate.
+- "node" — LangGraph: a callable in the agent graph; our canvas: a workspace tile.
+  Same word, very different level of abstraction.
+- "graph" — LangGraph: the directed workflow graph; our canvas: the live org chart.
+  Marketing copy should distinguish "workflow graph" (LangGraph) vs "org chart" (us).
+
+**Signals to react to:**
+- If LangGraph Cloud adds persistent agent identity (long-lived named agents beyond
+  per-session checkpoints) → direct hosted-platform competition; accelerate our
+  LangGraph adapter differentiation.
+- If LangGraph 2.0 guardrail nodes become the standard compliance primitive for AI
+  pipelines → expose an equivalent gate type in `workspace-template/` adapters.
+- If LangSmith + LangGraph Cloud bundle as an all-in-one enterprise platform → we
+  need to position our model-agnostic, self-hostable story more aggressively against
+  LangChain lock-in.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~29k ⭐, v1.1.6 April 10 2026, very active
+
+---
+
+### CrewAI — `crewAIInc/crewAI`
+
+**Pitch:** "Framework for orchestrating role-playing, autonomous AI agents — by
+fostering collaborative intelligence, CrewAI empowers agents to work together
+seamlessly, tackling complex tasks."
+
+**Shape:** Python library (MIT), ~48k ⭐, v1.14.2 released April 8 2026. Agents are
+defined by `role`, `goal`, and `backstory` fields and assembled into a `Crew` with
+`Process.sequential` (fixed order) or `Process.hierarchical` (manager agent
+delegates) execution. `Flow` (event-driven stateful pipelines, shipped 2024-Q4)
+enables complex conditional branching beyond linear crew execution. Model-agnostic:
+OpenAI, Anthropic, Gemini, Mistral, Bedrock, Ollama, and any LiteLLM-compatible
+endpoint. Tools are Python callables or MCP integrations. CrewAI Enterprise is the
+commercial SaaS offering.
+
+**Overlap with us:** Molecule AI ships a `crewai` runtime adapter
+(`molecule-ai-workspace-template-crewai`) — our workspaces *run* CrewAI crews.
+The Crew role model (`role` + `goal` + `backstory`) is our system-prompt-encoded
+persona convention made explicit and typed. `Process.hierarchical` with a manager
+agent mirrors our PM → Dev Lead → Engineer delegation chain. Flow's event-driven
+branching is analogous to our `workspace_schedules` trigger model.
+
+**Differentiation:** CrewAI is an in-process Python framework; Molecule AI is the
+operational platform. CrewAI agents are ephemeral per crew run — no Docker isolation,
+no persistent identity across restarts, no org-chart canvas, no A2A between
+independently deployed agents, no cron scheduling, no channel integrations. A
+Molecule AI CrewAI workspace *persists* across sessions, holds a role in a larger org,
+and coordinates via our A2A protocol — capabilities CrewAI alone does not provide.
+
+**Worth borrowing:**
+- **Typed role schema** — `{role, goal, backstory}` as first-class typed fields
+  (not free-text system prompt). Our `config.yaml` `role:` is a single string; adopting
+  a richer `{role, goal, backstory}` triplet would improve agent persona consistency
+  across restarts and be CrewAI-compatible.
+- **`Flow` event-driven pipelines** — conditional state-machine branching triggered by
+  events. Applicable to our `workspace_schedules` — replace cron-only triggers with
+  an event graph: "when PR merged → trigger QA workspace → on pass → trigger deploy."
+- **Tool decorator pattern** — `@tool` with docstring-as-schema is simpler than our
+  MCP tool config approach for workspace-local tools.
+
+**Terminology collisions:**
+- "crew" — their multi-agent team; our team is a set of workspaces in an org
+  hierarchy. Marketing copy should use "workspace org" not "crew" to stay distinct.
+- "agent" — their ephemeral in-process Python object; our long-lived Docker workspace.
+- "task" — their atomic unit of work assigned to an agent; our `current_task`
+  heartbeat field. Same word, different scope.
+
+**Signals to react to:**
+- If CrewAI ships persistent agent state between crew runs → closes primary gap with
+  our workspace model; ~48k ⭐ means it would land with significant reach.
+- If CrewAI Enterprise adds visual org-chart canvas or A2A-style inter-crew
+  communication → direct platform competitor.
+- If the 2026 State of Agentic AI survey (65% of orgs using agents) accelerates
+  CrewAI Enterprise sales → their enterprise positioning competes directly with ours;
+  update ICP messaging.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~48k ⭐, v1.14.2 April 8 2026, very active
+
+---
+
+### Temporal — `temporalio/temporal`
+
+**Pitch:** "The durable execution platform — write code that runs reliably even in
+the face of failures, timeouts, and restarts."
+
+**Shape:** Go server + SDKs for Go, Java, TypeScript, Python, .NET, PHP (MIT),
+~13k ⭐ server repo. Workflow logic is deterministic code that Temporal replays from
+event history after failures — no explicit retry/checkpoint code. `Activities` are
+the fallible steps; `Signals` allow external input mid-workflow; `Queries` expose
+read-only workflow state. Temporal Cloud is the managed SaaS; self-hosted runs on
+K8s or Docker. Raised $300M Series D at $5B valuation February 2026, with AI driving
+demand for durable execution. v1.30.4 released April 10 2026.
+
+**Overlap with us:** Molecule AI already integrates Temporal via
+`workspace-template/builtin_tools/temporal_workflow.py`. The `infra/scripts/setup.sh`
+starts a local Temporal server (`:7233` gRPC + `:8233` Web UI). Any Molecule AI
+workspace that needs bulletproof long-running or retryable work delegates to Temporal.
+Temporal's Worker Versioning (GA March 2026) solves the same code-deploy-during-live-
+workflow problem our restart-context message handles ad hoc.
+
+**Differentiation:** Temporal is infrastructure — a durable execution engine with no
+concept of agent identity, LLM calls, memory, org hierarchy, canvas, channels, or A2A.
+It is the *substrate* beneath agents that need guaranteed execution; we are the
+*platform* that decides when to call Temporal vs handle work in the workspace itself.
+We are Temporal consumers, not competitors. The distinction for users: use Temporal
+when you need workflow history replay and multi-step retry guarantees; use Molecule AI
+scheduling for lighter cron-triggered agent prompts.
+
+**Worth borrowing:**
+- **Worker Versioning** (GA March 2026) — pin live workflows to a specific code
+  version so deploys don't corrupt in-flight runs. Analogous problem to our
+  workspace restart-context; worth evaluating as the underlying mechanism for
+  zero-downtime workspace deploys.
+- **Workflow Update operation** — synchronous request/response pattern for live
+  workflows (e.g., human approves mid-workflow). Cleaner than our current
+  `approvals` polling; evaluate for HITL in long Temporal-backed workspace tasks.
+- **Upgrade-on-Continue-as-New** (Public Preview March 2026) — pinned workflows can
+  opt into a newer code version at a clean continuation boundary. Pattern applicable
+  to our workspace versioning strategy.
+
+**Terminology collisions:**
+- "workflow" — Temporal: a deterministic, replay-safe code function; ours: informal
+  delegation chain term. In our docs, "Temporal workflow" should always be qualified
+  to avoid confusion with "workflow" in general product copy.
+- "worker" — Temporal: a process that polls the server and executes workflow/activity
+  code; ours: not a first-class term (workspaces fill this role).
+- "activity" — Temporal: a fallible, retryable step in a workflow; ours: `activity_logs`
+  table (A2A traffic logs). Different concepts sharing a word.
+
+**Signals to react to:**
+- If Temporal Cloud adds native LLM-aware primitives (e.g., LLM call as a first-class
+  activity with token tracking, model fallback, prompt versioning) → Temporal becomes
+  an agent platform, not just an infra layer; reassess our `temporal_workflow.py`
+  integration depth.
+- If the $300M Series D accelerates enterprise sales motion → more enterprises will
+  arrive with Temporal already deployed; strengthen our Temporal integration story as
+  a first-class enterprise deployment pattern.
+- If Upgrade-on-Continue-as-New becomes stable → adopt for workspace blue/green
+  deploy pattern (no workspace downtime during code updates).
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~13k ⭐ (server); $5B valuation, $300M Series D Feb 2026; v1.30.4 April 10 2026
+
+---
+
+### Dify — `langgenius/dify`
+
+**Pitch:** "Production-ready platform for agentic workflow development — the leading
+open-source LLM app development platform."
+
+**Shape:** Python backend + React frontend (MIT), ~60k ⭐, v1.14.0 released February
+2026. Visual drag-drop workflow canvas where LLM calls, RAG retrievers, code
+executors, HTTP nodes, and agent loops are wired as a graph. Ships a full app
+deployment stack: API server, web UI widget, and Slack/Telegram/WhatsApp channel
+integrations. RAG pipeline with knowledge base management (file upload → chunk →
+embed → retrieve). Supports 50+ LLM providers. Dify Cloud is the managed SaaS;
+self-hosted via Docker Compose. Raised $30M Pre-A round led by HSG, March 2026.
+
+**Overlap with us:** Both have a visual canvas for connecting AI work. Both support
+channel integrations (Slack / Telegram / WhatsApp). Both run LLM-backed agents and
+expose a REST API for external trigger. Dify's `Human Input` node (v1.14.0) is the
+same pattern as our `approvals` table — pause workflow, wait for human input, resume.
+Their knowledge base (RAG) is the equivalent of what our Research Lead workspace does
+via tool calls to external retrieval services. Dify Cloud competes with our SaaS
+control plane for teams that want a hosted no-code LLM app platform.
+
+**Differentiation:** Dify targets **no-code and low-code builders** — the UX is
+workflow configuration, not code. No persistent agent identity across workflow runs,
+no multi-agent org hierarchy (agents in Dify are single workflow nodes, not
+first-class citizens), no A2A protocol between independently deployed agents, no
+Docker container isolation per agent. Molecule AI targets developers who write
+`config.yaml` and system prompts; Dify targets product managers and ops teams who
+want to deploy LLM apps without engineering. The ~60k ⭐ signal shows massive
+no-code demand that our current product does not address.
+
+**Worth borrowing:**
+- **Human Input node** — native human-in-the-loop as a workflow node type, not a
+  separate approvals API. Map to our `approvals` table: expose a "wait for human"
+  node in a future visual workspace config editor.
+- **Summary Index** (v1.14.0) — AI-generated summaries per document chunk in the
+  RAG knowledge base significantly improve retrieval precision. Applicable to our
+  Research Lead workspace's document retrieval; evaluate for our MCP memory backend.
+- **Knowledge base management UI** — file upload → chunk → embed → retrieval test
+  in a single interface. Reference design for our future `agent_memories` admin UI.
+- **Channel trigger UX** — same as n8n: three-click channel connect. Our channel
+  setup is more manual; Dify is a second data point that this is the target UX.
+
+**Terminology collisions:**
+- "workflow" — Dify: the visual graph of LLM+tool nodes that defines an app; ours:
+  informal delegation chain. In competitive positioning copy, distinguish "no-code
+  workflow builder" (Dify) vs "multi-agent org" (us).
+- "agent" — Dify: a single ReAct loop node inside a workflow; ours: a long-lived
+  Docker workspace with an assigned role. Different scope and persistence model.
+- "knowledge base" — Dify: an indexed file collection for RAG; ours: not a
+  first-class term (workspace agents manage their own retrieval).
+
+**Signals to react to:**
+- If Dify ships persistent agent identity (agents that remember state across workflow
+  runs, not just within one) → closes the primary product gap; ~60k ⭐ + no-code
+  accessibility is a formidable combination.
+- If Dify adds multi-agent coordination (agents that spawn and coordinate sub-agents
+  as org peers, not just nested workflow nodes) → direct overlap with our multi-
+  workspace hierarchy.
+- If the $30M Pre-A closes more enterprise deals → Dify moves up-market; watch for
+  enterprise canvas and RBAC features that would narrow our enterprise differentiation.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~60k ⭐, v1.14.0 Feb 2026; $30M Pre-A Mar 2026
+
+---
+
+### Flowise — `FlowiseAI/Flowise`
+
+**Pitch:** "Build AI Agents, Visually — drag-drop UI to build LLM flows and agent
+pipelines using LangChain and LlamaIndex components."
+
+**Shape:** Node.js + React (MIT repo; post-Workday acquisition terms TBD), ~30k ⭐,
+flowise@3.1.0 released March 16 2026. Drag-drop visual node editor where LangChain
+chains, LlamaIndex query engines, vector stores, tools, and agents are wired as a
+flow graph. Each flow is exported as a JSON config; the Flowise server exposes a
+REST API and a chat widget embed. **Agentflow** (shipped 2024) adds multi-agent
+composition: a Supervisor agent routes tasks to Worker agents within a single Flowise
+flow. **Acquired by Workday** (announced August 2025) — Flowise is now part of
+Workday's AI platform, bringing agent-building capability to Workday customers.
+Security: three chained CVEs (CVE-2025-59528, CVE-2025-8943, CVE-2025-26319) enabling
+unauthenticated RCE via Custom MCP Node were patched in v3.0.6 (exploit confirmed
+April 7 2026).
+
+**Overlap with us:** Both are drag-drop visual builders for AI agent workflows. Both
+support LangChain components under the hood. Flowise's Agentflow (Supervisor + Worker
+agents) mirrors our PM → engineer hierarchy, but within a single visual flow rather
+than independently deployed Docker workspaces. Flowise's REST API per flow is
+structurally similar to our `POST /workspaces/:id/a2a` endpoint — both let external
+systems trigger an agent and get a response. Channel integrations overlap with our
+`workspace_channels`.
+
+**Differentiation:** Flowise is a **no-code single-server app builder** — agents are
+stateless flow executions, not long-lived Docker workspaces with persistent memory,
+schedules, and org identity. Post-Workday acquisition, Flowise targets Workday
+enterprise customers (HR, finance, ops) rather than developer-first teams building AI
+companies. No persistent agent memory between flow runs, no A2A protocol between
+independently deployed agents, no cron scheduling, no org-chart canvas. The Workday
+acquisition actually *narrows* Flowise's addressable market to Workday-centric
+enterprises — which opens space for Molecule AI as the developer-first alternative.
+
+**Worth borrowing:**
+- **Agentflow Supervisor/Worker pattern** — the Supervisor agent dynamically routes
+  tasks to Workers based on their capabilities, with results aggregated back. More
+  flexible than our static PM → Lead delegation; study for dynamic routing in the PM
+  workspace's `delegate_task`.
+- **Flow-as-JSON export/import** — each Flowise flow is a portable JSON blob that
+  can be versioned, shared, and re-imported. Our workspace `config.yaml` is close;
+  adding a full workflow export (config + memory schema + skill list) as a bundle
+  would enable the same portability.
+- **Chat widget embed** — single-line script tag embeds a Flowise agent as a chat
+  widget on any webpage. Our `workspace_channels` is closer to outbound messaging;
+  a widget embed for inbound is a UX gap worth closing for developer adoption.
+
+**Terminology collisions:**
+- "flow" — Flowise: a visual JSON graph of LangChain nodes; ours: not a first-class
+  term. Avoid "flow" in our visual canvas docs to prevent confusion with Flowise-
+  trained users.
+- "node" — Flowise: a LangChain component tile in the flow canvas; our canvas: a
+  workspace tile. Same word, same visual metaphor, different semantics.
+- "supervisor" / "worker" — Flowise Agentflow roles; our PM / engineer hierarchy is
+  the same concept with different names. Our marketing should own "PM + engineer"
+  framing to stay distinct.
+
+**Signals to react to:**
+- If Workday opens Flowise APIs to non-Workday enterprise customers → Flowise
+  re-enters the general market with Workday distribution; update competitive messaging.
+- If the CVE chain (RCE via Custom MCP Node) causes enterprise churn → opportunity
+  to position Molecule AI's Docker-isolated workspaces as the security-first
+  alternative for self-hosted agent deployments.
+- If Flowise ships persistent agent memory or cross-flow A2A → closes primary gap;
+  monitor quarterly given Workday engineering resources.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~30k ⭐, flowise@3.1.0 March 16 2026; acquired by Workday Aug 2025
+
 ---
 ## Candidates to add (backlog)

 Short-list of projects to write up next time someone has an hour:

- **LangGraph** (`langchain-ai/langgraph`) — we already support it as a
-  runtime; worth a full entry for how their graph model compares to our
-  workspace hierarchy.
- **AutoGen** (`microsoft/autogen`) — ditto, we adapt it.
- **CrewAI** (`crewaiinc/crewai`) — ditto.
- **DeepAgents** (`langchain-ai/deepagents`) — ditto; particularly their
+- **AutoGen** (`microsoft/autogen`) — Microsoft's original repo; now superseded by
+  Microsoft Agent Framework (tracked above) and AG2 community fork (tracked above).
+  Entry should clarify which adapter target is canonical.
+- **DeepAgents** (`langchain-ai/deepagents`) — we adapt it; particularly their
  sub-agent feature that collides with our "skills" word.
 - **OpenClaw** — check if this is still live post-Hermes rebrand; our
  adapter may need renaming.
 - **Moltiverse / Moltbook** (`molti-verse.com`) — "social network for AI
  agents." Not a competitor; orthogonal ecosystem but worth tracking in
  case we want agent-to-agent discovery beyond a single org.
- **Temporal** (`temporalio/temporal`) — we already integrate; entry
-  should cover when to lean on Temporal vs our in-house scheduling.
+
+---
+
+### OpenAI Agents SDK — Sandbox Agents — `openai/openai-agents-python`
+
+**Pitch:** "A lightweight, powerful framework for multi-agent workflows — now with
+persistent isolated sandbox workspaces, snapshot/resume, and sandbox memory."
+
+**Shape:** Python (MIT), ~14k ⭐ (110 stars today), v0.14.0 released April 15, 2026.
+New beta surface: `SandboxAgent` backed by a `Manifest` (file tree, Git repo,
+mounts) and a `SandboxRunConfig` that targets a pluggable execution backend.
+Local: `UnixLocalSandboxClient`; containerised: `DockerSandboxClient`; hosted via
+optional extras for Blaxel, Cloudflare, Daytona, E2B, Modal, Runloop, and Vercel.
+**Sandbox memory** lets future runs inherit lessons from prior runs with progressive
+disclosure and configurable isolation boundaries. Existing SDK primitives (Agents,
+Handoffs, Guardrails, Tracing) are unchanged.
+
+**Overlap with us:** `SandboxAgent` + hosted backends directly competes with our
+workspace lifecycle model — a persistent isolated execution environment, snapshot
+and resume, durable memory. The multi-backend strategy (Docker, Modal, Vercel, E2B)
+mirrors our Docker workspace + cloud-provider abstraction goal. Sandbox memory is
+the same cross-session memory gap we address via `agent_memories`.
+
+**Differentiation:** Still a framework, not a platform — no visual canvas, no
+org-chart hierarchy, no A2A between independently deployed sandboxes (handoffs are
+in-process), no cron scheduling, no channel integrations. OpenAI-provider-optimised
+in practice. Our differentiators: multi-agent org hierarchy with A2A, model-agnostic,
+self-hostable, persistent agent identity beyond a single SDK process.
+
+**Worth borrowing:** `SandboxRunConfig` backend abstraction — decouple workspace
+execution from provider (Docker / Modal / Vercel) using a single config object.
+Directly applicable to our workspace provisioner. Sandbox memory progressive
+disclosure (summaries first, full context on demand) matches the retrieval strategy
+in claude-mem; adopt for `agent_memories` query API.
+
+**Terminology collisions:** "sandbox" — theirs: an isolated execution backend; ours:
+not a first-class term (we use "workspace" / "container"). "memory" — same word,
+same intent; our `agent_memories` and their sandbox memory are functionally equivalent.
+
+**Signals to react to:** If OpenAI adds inter-sandbox A2A (sandboxes delegating to
+each other across process boundaries) → direct platform feature parity; accelerate
+our A2A documentation and SDK ergonomics. If hosted backends gain TypeScript support
+(announced as roadmap) → Vercel + TS stack competes for our TypeScript-native users.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~14k ⭐, v0.14.0 April 15, 2026, OpenAI-maintained
+
+---
+
+### Tencent AI-Infra-Guard — `Tencent/AI-Infra-Guard`
+
+**Pitch:** "A full-stack AI Red Teaming platform securing AI ecosystems via Agent
+Scan, Skills Scan, MCP scan, AI Infra scan, and LLM jailbreak evaluation."
+
+**Shape:** Python + Go (Apache-2.0), ~3.5k ⭐, v4.1.3 released April 9, 2026.
+Tencent Zhuque Lab. Six scanning surfaces: ClawScan (open-source code security),
+Agent Scan (runtime agent behaviour audit), Skills Scan (verifying installed agent
+skills), MCP Server scan (tool-surface vulnerability detection), AI infrastructure
+CVE matching (1000+ CVEs across 57+ AI components including crewai, kubeai,
+lobehub), and LLM jailbreak evaluation. Ships a web UI, REST API, Docker deployment,
+and integration with ClawHub agent marketplace.
+
+**Overlap with us:** Our plugin/skills registry and MCP server are exactly the
+surfaces AI-Infra-Guard scans. The Skills Scan module validates installed agent
+skill packs — the same artefacts our `plugins/` directory ships. MCP Server scan
+targets the same `@molecule-ai/mcp-server` surface our platform exposes. If
+enterprise customers adopt AI-Infra-Guard for compliance audits, our plugin manifests
+and MCP tool definitions need to be compatible with its scanner.
+
+**Differentiation:** A security tooling product, not an agent framework or platform.
+No agent runtime, no orchestration, no canvas, no memory. Molecule AI builds and
+runs agents; AI-Infra-Guard audits them and their supply chain.
+
+**Worth borrowing:** MCP Server scan vulnerability categories — use as a checklist
+for hardening our own MCP server (`@molecule-ai/mcp-server`) before an enterprise
+security review. Skills Scan concept — add a `plugin validate` sub-command to
+`molecli` that runs the same checks locally before installing a plugin.
+
+**Terminology collisions:** "agent scan" — their runtime audit process; not a term
+we use. "skills scan" — their validation of installed skill packs; same artefact,
+different word ("plugin audit" in our vocabulary).
+
+**Signals to react to:** If AI-Infra-Guard publishes a formal MCP tool-surface
+security spec → treat as a compliance baseline for our MCP server hardening. If
+Tencent integrates this into enterprise procurement checklists → our plugin and MCP
+docs need an explicit security posture section to pass audits.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~3.5k ⭐, v4.1.3 April 9, 2026, Tencent Zhuque Lab
+
+---
+
+### VoltAgent — `VoltAgent/voltagent`
+
+**Pitch:** "The open-source TypeScript AI agent framework with a built-in
+observability and deployment console — build agents once, run and monitor them
+everywhere."
+
+**Shape:** TypeScript (MIT), ~8.2k ⭐, 668 releases, latest April 11, 2026.
+Two-layer design: `@voltagent/core` framework (typed agent definitions, tool
+registry, multi-agent supervisor/sub-agent coordination, memory, RAG, voice,
+guardrails) + **VoltOps Console** (hosted or self-hosted web UI for observability,
+deployment automation, and agent lifecycle management). MCP client support connects
+any MCP server as a tool source. Provider-agnostic: OpenAI, Anthropic, Google,
+Ollama, and any OpenAI-compatible endpoint. Ships `@voltagent/server-elysia` for
+Bun-native HTTP serving of agents.
+
+**Overlap with us:** VoltOps Console is the closest analogue to our Canvas we've
+tracked in the TypeScript ecosystem — both provide a web UI for managing and
+monitoring long-lived agents. The supervisor/sub-agent coordination model mirrors
+our PM → engineer delegation. MCP support means workspace skills install into
+VoltAgent as easily as ours. `@voltagent/server-elysia` pattern (agent as an HTTP
+server) is analogous to our A2A endpoint per workspace.
+
+**Differentiation:** No Docker workspace isolation, no persistent agent identity
+across server restarts, no A2A protocol between independently deployed agents, no
+cron scheduling, no channel integrations. VoltOps Console focuses on observability
+and deployment automation; our Canvas is the live visual org chart with drag-drop
+topology control. Molecule AI targets multi-agent companies; VoltAgent targets
+individual TypeScript developers building production agents.
+
+**Worth borrowing:** VoltOps observability schema — trace views, agent state
+inspection, and deployment automation as a single UI surface. Reference design for
+merging our Canvas agent-inspection panel with Langfuse traces into a unified
+observability tab. `@voltagent/core` typed agent definition API (role, memory,
+tools, guardrails as typed config) — cleaner than our YAML-then-system-prompt
+pipeline; evaluate for a future typed workspace config schema.
+
+**Terminology collisions:** "console" — VoltOps Console: their monitoring + deploy
+UI; our `molecli`: a TUI dashboard. Both are "consoles" for watching agents.
+"supervisor" — their orchestrating agent tier; our PM workspace plays the same role.
+
+**Signals to react to:** If VoltOps Console adds visual org-chart topology (not just
+list view) → direct Canvas competitor in the TypeScript ecosystem. If
+`@voltagent/core` multi-agent API becomes idiomatic for TS agent developers →
+consider shipping an official Molecule AI VoltAgent runtime adapter alongside our
+langgraph/crewai adapters.
+
+**Last reviewed:** 2026-04-16 · **Stars / activity:** ~8.2k ⭐, 668 releases, latest April 11, 2026
--- a/docs/marketing/competitors.md
+++ b/docs/marketing/competitors.md
@ -0,0 +1,112 @@
+# Competitor Tracker
+
+> **Auto-maintained by PMM cron** — diffs `docs/ecosystem-watch.md` on schedule
+> to detect version bumps, threat escalations, and notable changes.
+>
+> Source of truth for competitor state: `docs/ecosystem-watch.md#competitor-snapshot`
+> Full narrative analysis: `docs/ecosystem-watch.md#entries`
+>
+> **Last updated:** 2026-04-17 (bootstrap — subsequent updates by PMM cron)
+
+---
+
+## High-Threat Competitors
+
+Platforms that directly substitute for or significantly erode Molecule AI's market position.
+
+| Competitor | Version | Stars | Threat Signal | Updated |
+|---|---|---|---|---|
+| [OpenAI Agents SDK](https://github.com/openai/openai-agents-python) | v0.14.1 | 14k | v0.14.1 SandboxAgent beta — persistent isolated workspaces, snapshot/resume, sandbox memory; directly competes with our workspace lifecycle | 2026-04-17 |
+| [CrewAI](https://github.com/crewAIInc/crewAI) | v1.14.1 | 48k | 1.4B agentic automations, 60% Fortune 500 adoption, $18M Insight-led round; CrewAI Enterprise SaaS targeting our enterprise segment | 2026-04-17 |
+| [Google ADK](https://github.com/google/adk-python) | v1.30.0 | 19k | v1.30.0 adds Auth Provider registry; full Google agent stack (ADK + Gemini CLI + adk-web DevUI + Scion harness) = largest platform risk | 2026-04-17 |
+| [Microsoft Agent Framework](https://github.com/microsoft/agent-framework) | python-1.0.1 | 9.5k | v1.0 GA (official AutoGen successor); SOC 2/HIPAA compliance; .NET + Python; Process Framework GA in Q2 2026 | 2026-04-17 |
+
+---
+
+## Medium-Threat Competitors
+
+Significant overlap in adjacent space; active watch required.
+
+| Competitor | Version | Stars | Notes | Updated |
+|---|---|---|---|---|
+| [Paperclip](https://github.com/paperclipai/paperclip) | v2026.416.0 | 54.8k | Downgraded HIGH→MEDIUM (deep-dive #571): no A2A, no visual canvas on roadmap; single-process task DAG only; brand/framing threat ("zero-human companies"), not a technical substitute. Only gap vs Molecule AI: per-workspace budget limits (#541). | 2026-04-17 |
+| [Dify](https://github.com/langgenius/dify) | v1.13.3 | 60k | v1.14.0 RC adds Human Input node; $30M Pre-A ($180M val); no-code positioning targets business users, not our developer audience | 2026-04-17 |
+| [LangGraph](https://github.com/langchain-ai/langgraph) | v1.1.6 | 29k | CLI v0.4.22 Apr 16; LangGraph Cloud hosted execution competes with our scheduler | 2026-04-17 |
+| [VoltAgent](https://github.com/VoltAgent/voltagent) | server-elysia@2.0.7 | 8.2k | VoltOps Console = closest Canvas analogue in TypeScript ecosystem | 2026-04-17 |
+| [n8n](https://github.com/n8n-io/n8n) | v2.17.2 | 50k | n8n 2.0 enterprise AI Agent nodes + RBAC + 400+ channel integrations | 2026-04-17 |
+| [Claude Code Routines](https://code.claude.com/docs/en/routines) | cloud-feature | — | Apr 14 2026 launch: Anthropic-hosted cron + GitHub-event-triggered Claude Code sessions | 2026-04-17 |
+| [Scion](https://github.com/GoogleCloudPlatform/scion) | active | early | GCP experimental container-per-agent harness (Apr 8 2026); escalation risk to HIGH if productized | 2026-04-17 |
+| [Multica](https://github.com/multica-ai/multica) | active | 12.8k | Positioned as Claude Managed Agents alternative; local daemon + central backend with skill compounding | 2026-04-17 |
+| [Cline](https://github.com/cline/cline) | active | 44k | Primary user-overlap with our Claude Code workspace; developers who outgrow Cline convert to Molecule AI | 2026-04-17 |
+| [ClawRun](https://github.com/clawrun-sh/clawrun) | active | 84 | Closest architectural match tracked (sandbox/heartbeat/snapshot-resume/channels/cost-tracking); early stage but actively shipped | 2026-04-17 |
+| [Gemini CLI](https://github.com/google-gemini/gemini-cli) | v0.38.1 | 101k | Runtime candidate for our workspace adapter; elevated to MEDIUM as part of Google's full agent stack | 2026-04-17 |
+
+---
+
+## Low-Threat Competitors
+
+Tools, infra layers, single-agent products, or projects we use — not direct substitutes.
+
+| Competitor | Version | Stars | Role | Updated |
+|---|---|---|---|---|
+| [Hermes Agent](https://github.com/NousResearch/hermes-agent) | v0.10.0 | 61k | v0.10.0 (Apr 16) Tool Gateway launch; personal AI single-user shape | 2026-04-17 |
+| [gstack](https://github.com/garrytan/gstack) | active | 70k | Sequential single-session Claude Code persona-switching; no multi-agent infra | 2026-04-17 |
+| [claude-mem](https://github.com/thedotmack/claude-mem) | active | 56k | Memory addon; 56k ⭐ signals demand gap we need to close in agent_memories | 2026-04-17 |
+| [Flowise](https://github.com/FlowiseAI/Flowise) | flowise@3.1.2 | 30k | Acquired by Workday (Aug 2025); v3.1.2 security hardening; narrowed to HR/finance enterprise | 2026-04-17 |
+| [OpenHands](https://github.com/All-Hands-AI/OpenHands) | v1.6.0 | 47k | SWE-Bench top; v1.6.0 (Mar 30); single-agent software engineer only | 2026-04-17 |
+| [Temporal](https://github.com/temporalio/temporal) | v1.30.4 | 13k | Durable execution infra we integrate; $5B valuation, not a competitor | 2026-04-17 |
+| [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) | active | 35.5k | Browser MCP we adopt (issue #540); 23-tool surface | 2026-04-17 |
+| [AgentScope](https://github.com/modelscope/agentscope) | v1.0.18 | 23.8k | Alibaba/ModelScope framework; MCP integration; no deployment layer | 2026-04-17 |
+| [Composio](https://github.com/composio-dev/composio) | active | 18k | Tool integration library; potential skill-pack dependency | 2026-04-17 |
+| [Archon](https://github.com/coleam00/Archon) | v0.3.6 | 18.1k | YAML-DAG coding workflow; reference design for workspace delivery pipelines | 2026-04-17 |
+| [Skills CLI](https://github.com/vercel-labs/skills) | active | 14.2k | Vercel agentskills.io CLI; aligning plugins/ = free distribution channel | 2026-04-17 |
+| [Holaboss](https://github.com/holaboss-ai/holaboss-ai) | active | 1.7k | Desktop AI employee; terminology collisions (workspace/SKILL.md) | 2026-04-17 |
+| [Tencent AI-Infra-Guard](https://github.com/Tencent/AI-Infra-Guard) | v4.1.3 | 3.5k | Security scanner; use as MCP + plugin registry compliance checklist | 2026-04-17 |
+| [Plannotator](https://github.com/backnotprop/plannotator) | v0.17.10 | 4.3k | HITL plan annotation UX; reference for improving approvals API schema | 2026-04-17 |
+| [open-multi-agent](https://github.com/JackChen-me/open-multi-agent) | v1.1.0 | 5.7k | TypeScript goal-to-DAG library; ephemeral, no identity | 2026-04-17 |
+| [Open Agents (Vercel)](https://github.com/vercel-labs/open-agents) | active | 2.2k | Reference app; snapshot-based VM resumption pattern worth borrowing | 2026-04-17 |
+| [GenericAgent](https://github.com/lsdefine/GenericAgent) | v1.0 | 2.1k | Self-evolving skill tree; four-tier memory taxonomy worth borrowing | 2026-04-17 |
+| [OpenSRE](https://github.com/Tracer-Cloud/opensre) | active | 900 | AI SRE toolkit; potential DevOps workspace skill-pack source | 2026-04-17 |
+| [AMD GAIA](https://github.com/amd/gaia) | v0.17.2 | 1.2k | Hardware-locked (AMD Ryzen AI 300+); not general-purpose | 2026-04-17 |
+
+---
+
+## Watchlist — Escalation Signals
+
+The following events would require immediate threat-level re-assessment:
+
+| Competitor | Watch Signal | Current Level | Escalates To |
+|---|---|---|---|
+| Paperclip | Ships persistent agent memory | MEDIUM | HIGH — 54.8k ⭐ head-start |
+| Paperclip | Ships visual org-chart canvas | MEDIUM | HIGH — direct Canvas competitor |
+| Scion | Google productizes as managed GCP service | MEDIUM | HIGH |
+| VoltAgent | VoltOps Console adds visual org-chart topology | MEDIUM | HIGH |
+| Google ADK | ADK + Vertex AI becomes hosted managed platform | HIGH | CRITICAL |
+| OpenAI Agents SDK | Inter-sandbox A2A across process boundaries | HIGH | CRITICAL |
+| ClawRun | Adds A2A or multi-agent coordination | MEDIUM | HIGH |
+| gstack | Adds multi-session/parallel execution | LOW | HIGH — 70k ⭐ head-start |
+| Claude Code Routines | Adds A2A between routine sessions | MEDIUM | HIGH — Anthropic distribution |
+
+---
+
+## Recently Changed (last 30 days)
+
+> PMM cron updates this section automatically when `notable_changes` or `version` fields change.
+
+| Date | Competitor | Change |
+|---|---|---|
+| 2026-04-17 | **Paperclip** | Threat downgraded HIGH→MEDIUM (deep-dive #571): no A2A, no canvas, brand threat only |
+| 2026-04-17 | **Paperclip** | v2026.416.0 — execution policies + chat threads for agent transcripts |
+| 2026-04-17 | **Hermes Agent** | v0.10.0 — Tool Gateway (web search, image gen, TTS, browser automation) |
+| 2026-04-16 | **LangGraph CLI** | v0.4.22 — deploy source tracking |
+| 2026-04-15 | **OpenAI Agents SDK** | v0.14.1 — tracing patch on top of Sandbox Agents beta |
+| 2026-04-15 | **Gemini CLI** | v0.38.1 — stability patch |
+| 2026-04-14 | **Flowise** | v3.1.2 — security hardening (CORS, credential leaks) |
+| 2026-04-14 | **Claude Code Routines** | Launched — Anthropic-hosted cron-triggered Claude Code sessions |
+| 2026-04-13 | **Google ADK** | v1.30.0 — Auth Provider + Parameter Manager + Gemma 4 support |
+| 2026-04-11 | **VoltAgent** | server-elysia@2.0.7 — A2A agent card URL fix |
+| 2026-04-10 | **LangGraph** | v1.1.6 — declarative guardrail nodes (LangGraph 2.0 GA) |
+| 2026-04-10 | **Temporal** | v1.30.4 — CVE-2026-5724 security patch |
+| 2026-04-10 | **Microsoft Agent Framework** | python-1.0.1 — FileCheckpointStorage security hardening |
+| 2026-04-08 | **Scion** | Launched — GCP container-per-agent experimental harness |
+| 2026-04-08 | **CrewAI** | v1.14.1 — async checkpoint TUI browser |
--- a/docs/tutorials/fly-machines-provisioner.md
+++ b/docs/tutorials/fly-machines-provisioner.md
@ -0,0 +1,92 @@
+# Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)
+
+Molecule AI can provision agent workspaces as [Fly Machines](https://fly.io/docs/machines/) instead of local Docker containers. Set `CONTAINER_BACKEND=flyio` on your platform and every `POST /workspaces` call creates a Fly Machine in your app — with tier-based resource limits, env-var injection, and A2A registration handled automatically.
+
+> **Scope note (PR #501):** Workspace images must already be published to GHCR before provisioning. The `delete` and `restart` platform endpoints are not yet fully wired to the Fly provisioner — use `flyctl machine stop/destroy` for teardown until a follow-up PR lands.
+
+## What you'll need
+
+- A Molecule AI platform instance
+- A [Fly.io](https://fly.io) account with a Fly app created for workspace machines
+- `flyctl` installed locally
+- `curl` + `jq`
+
+## Setup
+
+```bash
+# 1. Set CONTAINER_BACKEND and Fly credentials on your platform process
+#    (add to your platform's .env or deployment config)
+export CONTAINER_BACKEND=flyio
+export FLY_API_TOKEN=<your-fly-deploy-token>      # flyctl tokens create deploy
+export FLY_WORKSPACE_APP=my-molecule-workspaces   # fly app created for this purpose
+export FLY_REGION=ord                             # optional, default: ord
+
+# 2. Restart the platform so it picks up CONTAINER_BACKEND=flyio
+#    (varies by your deployment — docker restart, systemd reload, etc.)
+
+# 3. Verify the platform is using the Fly provisioner
+curl -s http://localhost:8080/healthz | jq .
+
+# 4. Create a workspace — the platform provisions it as a Fly Machine
+WS=$(curl -s -X POST http://localhost:8080/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "fly-worker",
+    "role": "Fly-provisioned inference worker",
+    "runtime": "hermes",
+    "tier": 2
+  }' | jq -r '.id')
+echo "Workspace ID: $WS"
+
+# 5. Watch the Fly Machine appear (~15–30s)
+flyctl machines list --app $FLY_WORKSPACE_APP
+
+# 6. Poll until the workspace is ready
+until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
+  echo "Waiting..."; sleep 5
+done
+
+# 7. Smoke test — send an A2A task
+curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","id":"1","method":"message/send",
+       "params":{"message":{"role":"user","parts":[{"kind":"text",
+       "text":"What region are you running in?"}]}}}' \
+  | jq '.result.parts[0].text'
+
+# 8. Inspect the Fly Machine details
+flyctl machines show --app $FLY_WORKSPACE_APP
+
+# 9. Teardown (see scope note — use flyctl directly for now)
+flyctl machines destroy --app $FLY_WORKSPACE_APP --force
+```
+
+## Expected output
+
+Step 5 (`flyctl machines list`) shows the new machine with a `started` state within ~30 seconds. The platform injects your workspace secrets, `PLATFORM_URL`, and workspace ID as environment variables on the machine, then issues an auth token so the agent registers on boot.
+
+Step 7 returns the agent's reply — proof that A2A JSON-RPC is routing through the Fly Machine correctly. The `FLY_REGION` env var is visible inside the container, so asking the agent "What region are you running in?" should return `ord` (or whichever region you set).
+
+## Resource tiers
+
+The Fly provisioner applies tier-based limits automatically — no manual machine sizing needed:
+
+| Tier | RAM | CPUs | Use case |
+|------|-----|------|----------|
+| T2 | 512 MB | 1 | Light workers, eval agents |
+| T3 | 2 GB | 2 | General-purpose orchestrators |
+| T4 | 4 GB | 4 | Heavy inference, long-context tasks |
+
+Set `"tier": 2`, `3`, or `4` in your `POST /workspaces` body. Runtime images are resolved from GHCR automatically (`hermes` → `ghcr.io/molecule-ai/workspace-hermes:latest`).
+
+## Why Fly Machines
+
+Fly Machines start in milliseconds and run in 35+ regions. Provisioning agent workspaces on Fly means your inference workers can live close to your users with no infrastructure code changes — just set `FLY_REGION` per workspace. Because the Fly provisioner implements the same `Provisioner` interface as the Docker backend, the rest of the platform is unchanged: same REST API, same A2A protocol, same workspace management UI.
+
+## Related
+
+- PR #501: [feat(platform): Fly Machines provisioner](https://github.com/Molecule-AI/molecule-core/pull/501)
+- PR #481: [feat(ci): deploy to Fly after image push](https://github.com/Molecule-AI/molecule-core/pull/481)
+- [Fly Machines API docs](https://fly.io/docs/machines/api/)
+- [Platform API reference](../api-reference.md)
+- Issue [#525](https://github.com/Molecule-AI/molecule-core/issues/525)
--- a/docs/tutorials/google-adk-runtime.md
+++ b/docs/tutorials/google-adk-runtime.md
@ -0,0 +1,74 @@
+# Running a Google ADK Workspace on Molecule AI
+
+Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
+
+## What you'll need
+
+- A Molecule AI account with at least one provisioned tenant
+- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
+- `curl` + `jq`
+
+## Setup
+
+```bash
+# 1. Store your Google API key as a global secret
+curl -s -X PUT http://localhost:8080/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
+
+# 2. Create a google-adk workspace
+WS=$(curl -s -X POST http://localhost:8080/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "adk-agent",
+    "role": "Google ADK inference worker",
+    "runtime": "google-adk",
+    "model": "google:gemini-2.0-flash"
+  }' | jq -r '.id')
+echo "Workspace: $WS"
+
+# 3. Wait for ready (~30s)
+until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
+  echo "Waiting..."; sleep 5
+done
+
+# 4. Send your first task
+curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","id":"1","method":"message/send",
+       "params":{"message":{"role":"user","parts":[{"kind":"text",
+       "text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
+  | jq '.result.parts[0].text'
+
+# 5. Multi-turn — session state is preserved across calls
+curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","id":"2","method":"message/send",
+       "params":{"message":{"role":"user","parts":[{"kind":"text",
+       "text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
+  | jq '.result.parts[0].text'
+
+# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
+# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
+# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
+# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
+```
+
+## Expected output
+
+After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
+
+## How it works
+
+The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
+
+## Mixed-runtime teams
+
+ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
+
+## Related
+
+- PR #550: [feat(adapters): add google-adk runtime adapter](https://github.com/Molecule-AI/molecule-core/pull/550)
+- [Google ADK (adk-python)](https://github.com/google/adk-python)
+- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
+- [Platform API reference](../api-reference.md)
--- a/docs/tutorials/hermes-multi-provider-dispatch.md
+++ b/docs/tutorials/hermes-multi-provider-dispatch.md
@ -0,0 +1,184 @@
+# Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History
+
+Hermes is Molecule AI's inference router. Out of the box it proxies every model through an OpenAI-compatible shim. That works for plain text, but the shim does format translation on every round-trip — and it gets the Gemini message format wrong (Gemini expects `role: "model"` and a `parts: [{text}]` wrapper; the shim passes `role: "assistant"` and a flat string). It also flattens multi-turn conversations into a single user blob, losing role attribution across turns.
+
+Phases 2a–2c wire three native dispatch paths keyed on `auth_scheme`. This tutorial shows you how to unlock them.
+
+> **Phase 2d scope note:** Tool calling, vision content blocks, system instructions, and streaming on the native paths are scoped for Phase 2d and are **not yet shipped**. This tutorial covers what is merged today: correct native dispatch + multi-turn history continuity.
+
+## What you'll need
+
+- A Molecule AI account with API access
+- `ANTHROPIC_API_KEY` **or** `GEMINI_API_KEY` (or both)
+- `curl` + `jq`
+
+## The dispatch table
+
+After Phases 2a / 2b / 2c, Hermes picks an inference path based on which provider is configured:
+
+| `auth_scheme` | Dispatch path | Provider | API |
+|---|---|---|---|
+| `openai` | `_do_openai_compat` | 13 providers (OpenRouter, Groq, Mistral…) | OpenAI-compat shim |
+| `anthropic` | `_do_anthropic_native` | Anthropic | Native Messages API |
+| `gemini` | `_do_gemini_native` | Google | Native `generateContent` |
+| unknown | `_do_openai_compat` + warning | any | OpenAI-compat shim (forward-compat) |
+
+**Rule of thumb:** set `ANTHROPIC_API_KEY` to get native Anthropic dispatch. Set `GEMINI_API_KEY` to get native Gemini dispatch. Set `NOUS_API_KEY` / `HERMES_API_KEY` / `OPENROUTER_API_KEY` to stay on the compat shim. Molecule AI reads these in priority order: `HERMES_API_KEY` → `OPENROUTER_API_KEY` → `ANTHROPIC_API_KEY` → `GEMINI_API_KEY`. The **first key found wins**, so don't set `HERMES_API_KEY` if you want native dispatch.
+
+---
+
+## Setup
+
+```bash
+# 0. Export your platform URL and a workspace to use as orchestrator
+export MOLECULE_API=http://localhost:8080
+export ORCH_ID=<your-orchestrator-workspace-id>
+
+# 1. Store your Anthropic key as a global secret
+curl -s -X PUT $MOLECULE_API/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-YOUR-KEY"}' | jq .
+
+# 2. Create a Hermes workspace — Anthropic native dispatch
+ANTHROPIC_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "hermes-anthropic",
+    "role": "Inference worker — native Anthropic path",
+    "runtime": "hermes",
+    "model": "anthropic:claude-sonnet-4-5"
+  }' | jq -r '.id')
+echo "Anthropic workspace: $ANTHROPIC_WS"
+
+# 3. Wait for it to be ready (~20–30s)
+until curl -s $MOLECULE_API/workspaces/$ANTHROPIC_WS | jq -r '.status' | grep -q ready; do
+  echo "Waiting..."; sleep 5
+done
+
+# 4. Store your Gemini key as a global secret
+curl -s -X PUT $MOLECULE_API/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}' | jq .
+
+# 5. Create a Hermes workspace — Gemini native dispatch
+GEMINI_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "hermes-gemini",
+    "role": "Inference worker — native Gemini path",
+    "runtime": "hermes",
+    "model": "gemini:gemini-2.0-flash"
+  }' | jq -r '.id')
+echo "Gemini workspace: $GEMINI_WS"
+
+# 6. Pin the Gemini workspace to Gemini-only keys (no ANTHROPIC_API_KEY override)
+curl -s -X PUT $MOLECULE_API/workspaces/$GEMINI_WS/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"ANTHROPIC_API_KEY","value":""}' | jq .
+
+# 7. Confirm dispatch — send a single-turn probe to the Anthropic workspace
+curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"probe-1","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text","text":"Which API are you using to generate this response?"}]}}
+  }' | jq '.result.parts[0].text'
+
+# 8. Same probe to the Gemini workspace
+curl -s -X POST $MOLECULE_API/workspaces/$GEMINI_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"probe-2","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text","text":"Which API are you using to generate this response?"}]}}
+  }' | jq '.result.parts[0].text'
+
+# 9. Multi-turn history — Phase 2c keeps turns as turns (not flattened)
+#    Send turn 1
+curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"turn-1","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text","text":"My name is Alice. Remember that."}]}}
+  }' | jq '.result.parts[0].text'
+
+# 10. Send turn 2 — history is automatically threaded by Hermes Phase 2c
+curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"turn-2","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text","text":"What is my name?"}]}}
+  }' | jq '.result.parts[0].text'
+# Expected: "Alice" — not "I don't know", which the old flattened path could produce
+```
+
+## Expected output
+
+**Step 7 (Anthropic workspace):** The agent confirms it is calling the Anthropic Messages API natively. Hermes executed `_do_anthropic_native` — no OpenAI-compat translation layer.
+
+**Step 8 (Gemini workspace):** The agent confirms Google `generateContent`. Hermes called `_do_gemini_native`, which passes `role: "model"` (not `"assistant"`) and the `parts: [{text: ...}]` wrapper the native SDK requires. The compat-shim translation that produced incorrect message format is bypassed.
+
+**Step 10 (multi-turn, Phase 2c):** Returns `"Alice"`. Before Phase 2c, history was flattened into a single user blob — the model could recover the gist but lost clean role attribution. Phase 2c passes turns as turns: OpenAI uses `{role, content}`, Anthropic uses the same wire shape for text-only, Gemini uses `{role: "model", parts: [{text}]}`.
+
+## How dispatch works under the hood
+
+`HermesA2AExecutor._do_inference(user_message, history)` reads `self.provider_cfg.auth_scheme`:
+
+```python
+if self.provider_cfg.auth_scheme == "anthropic":
+    return await self._do_anthropic_native(user_message, history)
+elif self.provider_cfg.auth_scheme == "gemini":
+    return await self._do_gemini_native(user_message, history)
+else:  # "openai" + unknown (forward-compat fallback)
+    return await self._do_openai_compat(user_message, history)
+```
+
+Fail-loud semantics: if the `anthropic` package isn't installed, `_do_anthropic_native` raises a clear `RuntimeError` before any inference attempt. Same for `google-genai`. Silent fallback to the compat shim would mask format errors — Molecule AI chooses loud failure.
+
+## Building a multi-provider team
+
+The real win surfaces in a mixed-provider agent team. Your orchestrator can fan tasks to an Anthropic worker and a Gemini worker simultaneously, each receiving properly formatted messages through their native API paths:
+
+```bash
+# Fan out from the orchestrator — both fire in parallel
+curl -s -X POST $MOLECULE_API/workspaces/$ORCH_ID/a2a \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"jsonrpc\":\"2.0\",\"id\":\"fan-1\",\"method\":\"message/send\",
+    \"params\":{\"message\":{\"role\":\"user\",\"parts\":[{\"kind\":\"text\",
+    \"text\":\"delegate_task_async $ANTHROPIC_WS 'Draft release notes for v2.1' AND delegate_task_async $GEMINI_WS 'Summarise the last 30 days of support tickets'\"}]}}
+  }" | jq .
+```
+
+Both workers use their native inference paths. No LiteLLM proxy layer. No format translation on every request. The orchestrator gets results back through the same A2A protocol regardless of which underlying model powered each task.
+
+## Capability comparison: Hermes native vs the compat shim
+
+What is shipping today (Phases 2a + 2b + 2c — all merged to main):
+
+| Capability | OpenAI-compat shim | Anthropic native | Gemini native |
+|---|---|---|---|
+| Plain text (single-turn) | ✅ | ✅ | ✅ |
+| Multi-turn history | ⚠️ flattened into one user blob | ✅ role-attributed turns | ✅ `role: "model"` + `parts` wrapper |
+| Correct Gemini message format | ❌ wrong role + missing parts wrapper | — | ✅ |
+| No compat-shim translation overhead | ❌ every request translated | ✅ | ✅ |
+
+What is on the roadmap for Phase 2d (not yet shipped):
+
+| Capability | Anthropic native | Gemini native |
+|---|---|---|
+| `tool_use` / `tool_result` blocks | 📋 Phase 2d | 📋 Phase 2d |
+| Vision content blocks | 📋 Phase 2d | 📋 Phase 2d |
+| System instructions (`system=`) | 📋 Phase 2d | 📋 Phase 2d (`system_instruction=`) |
+| Extended thinking | 📋 Phase 2d | — |
+| Streaming | 📋 Phase 2d | 📋 Phase 2d |
+
+**Why Molecule AI vs Letta / AG2 / n8n:** Those frameworks handle multi-LLM at the application layer — you write different agent classes per provider. Molecule AI handles it at the infrastructure layer. Your workspace configs change; your orchestration code doesn't. Swap a Gemini worker for an Anthropic worker by changing one secret. No code redeploy.
+
+## Related
+
+- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
+- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
+- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
+- [Hermes adapter design](../adapters/hermes-adapter-design.md)
+- [Platform API reference](../api-reference.md)
+- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)
--- a/platform/internal/events/broadcaster.go
+++ b/platform/internal/events/broadcaster.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"log"
+	"sync"
 	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
@ -14,8 +15,17 @@ import (

 const broadcastChannel = "events:broadcast"

+// sseSubscription is a single in-process SSE subscriber.
+// deliverToSSE writes to ch; StreamEvents reads from it.
+type sseSubscription struct {
+	workspaceID string
+	ch          chan models.WSMessage
+}
+
 type Broadcaster struct {
-	hub *ws.Hub
+	hub    *ws.Hub
+	ssesMu sync.RWMutex
+	sses   []*sseSubscription
 }

 func NewBroadcaster(hub *ws.Hub) *Broadcaster {
@ -59,6 +69,9 @@ func (b *Broadcaster) RecordAndBroadcast(ctx context.Context, eventType string,
 	// Broadcast to local WebSocket clients
 	b.hub.Broadcast(msg)

+	// Fan out to in-process SSE subscribers (e.g. GET /events/stream).
+	b.deliverToSSE(msg)
+
 	return nil
 }

@ -79,6 +92,52 @@ func (b *Broadcaster) BroadcastOnly(workspaceID string, eventType string, payloa
 	}

 	b.hub.Broadcast(msg)
+
+	// Fan out to in-process SSE subscribers.
+	b.deliverToSSE(msg)
+}
+
+// SubscribeSSE registers a per-workspace in-process channel for SSE streaming.
+// The caller MUST invoke the returned cancel func when it disconnects so the
+// subscription is removed and the channel is not leaked.
+func (b *Broadcaster) SubscribeSSE(workspaceID string) (<-chan models.WSMessage, func()) {
+	sub := &sseSubscription{
+		workspaceID: workspaceID,
+		ch:          make(chan models.WSMessage, 64),
+	}
+	b.ssesMu.Lock()
+	b.sses = append(b.sses, sub)
+	b.ssesMu.Unlock()
+
+	cancel := func() {
+		b.ssesMu.Lock()
+		defer b.ssesMu.Unlock()
+		for i, s := range b.sses {
+			if s == sub {
+				b.sses = append(b.sses[:i], b.sses[i+1:]...)
+				break
+			}
+		}
+	}
+	return sub.ch, cancel
+}
+
+// deliverToSSE fans msg out to every in-process SSE subscriber watching the
+// same workspace. Non-blocking: if a subscriber's buffer is full the event is
+// dropped with a log line (the WebSocket path still delivers it).
+func (b *Broadcaster) deliverToSSE(msg models.WSMessage) {
+	b.ssesMu.RLock()
+	defer b.ssesMu.RUnlock()
+	for _, s := range b.sses {
+		if s.workspaceID != msg.WorkspaceID {
+			continue
+		}
+		select {
+		case s.ch <- msg:
+		default:
+			log.Printf("SSE: subscriber buffer full for workspace %s, dropping event %s", msg.WorkspaceID, msg.Event)
+		}
+	}
 }

 // Subscribe listens to Redis pub/sub and relays events to the WebSocket hub.
--- a/platform/internal/handlers/a2a_proxy.go
+++ b/platform/internal/handlers/a2a_proxy.go
@ -251,6 +251,12 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
 	if logActivity {
 		h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs)
 	}
+
+	// Track LLM token usage for cost transparency (#593).
+	// Fires in a detached goroutine so token accounting never adds latency
+	// to the critical A2A path.
+	go extractAndUpsertTokenUsage(context.WithoutCancel(ctx), workspaceID, respBody)
+
 	return resp.StatusCode, respBody, nil
 }

@ -577,3 +583,65 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) e
 // token" branch so the handler-level guard can detect it without string
 // matching (the wsauth errors are typed for the invalid case).
 var errInvalidCallerToken = errors.New("missing caller auth token")
+
+// extractAndUpsertTokenUsage parses LLM usage from a raw A2A response body
+// and persists it via upsertTokenUsage. Safe to call in a goroutine — logs
+// errors but never panics. ctx must already be detached from the request.
+func extractAndUpsertTokenUsage(ctx context.Context, workspaceID string, respBody []byte) {
+	in, out := parseUsageFromA2AResponse(respBody)
+	if in > 0 || out > 0 {
+		upsertTokenUsage(ctx, workspaceID, in, out)
+	}
+}
+
+// parseUsageFromA2AResponse extracts input_tokens / output_tokens from an A2A
+// JSON-RPC response. Inspects two locations in order of preference:
+//  1. result.usage — the JSON-RPC 2.0 result envelope from workspace agents.
+//  2. usage — top-level, for non-JSON-RPC or direct Anthropic-shaped payloads.
+//
+// Returns (0, 0) when no recognisable usage data is found.
+func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
+	if len(body) == 0 {
+		return 0, 0
+	}
+	var top map[string]json.RawMessage
+	if err := json.Unmarshal(body, &top); err != nil {
+		return 0, 0
+	}
+
+	// 1. result.usage (JSON-RPC 2.0 wrapper produced by workspace agents).
+	if rawResult, ok := top["result"]; ok {
+		var result map[string]json.RawMessage
+		if err := json.Unmarshal(rawResult, &result); err == nil {
+			if in, out, ok := readUsageMap(result); ok {
+				return in, out
+			}
+		}
+	}
+
+	// 2. Fallback: top-level usage (direct Anthropic or non-JSON-RPC response).
+	if in, out, ok := readUsageMap(top); ok {
+		return in, out
+	}
+	return 0, 0
+}
+
+// readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
+// Returns (0, 0, false) when the key is absent or contains no non-zero values.
+func readUsageMap(m map[string]json.RawMessage) (inputTokens, outputTokens int64, ok bool) {
+	rawUsage, has := m["usage"]
+	if !has {
+		return 0, 0, false
+	}
+	var usage struct {
+		InputTokens  int64 `json:"input_tokens"`
+		OutputTokens int64 `json:"output_tokens"`
+	}
+	if err := json.Unmarshal(rawUsage, &usage); err != nil {
+		return 0, 0, false
+	}
+	if usage.InputTokens == 0 && usage.OutputTokens == 0 {
+		return 0, 0, false
+	}
+	return usage.InputTokens, usage.OutputTokens, true
+}
--- a/platform/internal/handlers/delegation.go
+++ b/platform/internal/handlers/delegation.go
@ -54,6 +54,13 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 		return // response already written
 	}

+	// #548 — prevent self-delegation: a workspace delegating to itself
+	// acquires _run_lock twice on the same mutex, deadlocking permanently.
+	if sourceID == body.TargetID {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "self-delegation not permitted"})
+		return
+	}
+
 	// #124 — idempotency. If the caller supplies an idempotency_key, return
 	// the existing delegation when (workspace_id, idempotency_key) already
 	// exists and is not in a failed terminal state.
--- a/platform/internal/handlers/delegation_test.go
+++ b/platform/internal/handlers/delegation_test.go
@ -88,6 +88,37 @@ func TestDelegate_InvalidUUIDTargetID(t *testing.T) {
 	}
 }

+// ---------- Delegate: self-delegation → 400 ----------
+
+func TestDelegate_SelfDelegation_Rejected(t *testing.T) {
+	setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+	dh := NewDelegationHandler(wh, broadcaster)
+
+	// Use the same UUID for both source and target to trigger the self-delegation guard.
+	selfID := "11111111-2222-3333-4444-555555555555"
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: selfID}}
+	body := `{"target_id":"` + selfID + `","task":"do something"}`
+	c.Request = httptest.NewRequest("POST", "/workspaces/"+selfID+"/delegate", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	dh.Delegate(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	json.Unmarshal(w.Body.Bytes(), &resp)
+	if resp["error"] != "self-delegation not permitted" {
+		t.Errorf("expected 'self-delegation not permitted', got %v", resp["error"])
+	}
+}
+
 // ---------- Delegate: success → 202 with delegation_id ----------

 func TestDelegate_Success(t *testing.T) {
--- a/platform/internal/handlers/github_token.go
+++ b/platform/internal/handlers/github_token.go
@ -0,0 +1,115 @@
+// Package handlers — GitHub App installation-token refresh endpoint.
+//
+// GET /admin/github-installation-token returns a fresh GitHub App
+// installation token on demand. Long-running workspace containers use
+// this as a git credential helper and for explicit `gh auth` re-runs
+// so they never operate with an expired GH_TOKEN.
+//
+// # Why this endpoint?
+//
+// The github-app-auth plugin (PR #506) injects GH_TOKEN + GITHUB_TOKEN
+// into a workspace container's env at provision time. Those tokens are
+// GitHub App installation tokens with a fixed ~60 min TTL. The plugin
+// keeps a server-side in-process cache and proactively refreshes it
+// 5 min before expiry, but the workspace env is set once at container
+// start and never updated — so any workspace alive >60 min ends up with
+// an expired token (issue #547).
+//
+// The fix is:
+//
+//  1. Platform side (this file): expose GET /admin/github-installation-token.
+//     The handler delegates to the registered TokenProvider (typically the
+//     github-app-auth plugin), whose cache is always fresh. Gated behind
+//     AdminAuth — any valid workspace bearer token can call it.
+//
+//  2. Workspace side: a shell credential helper
+//     (workspace-template/scripts/molecule-git-token-helper.sh) configured
+//     as the git credential helper. git calls it on every push/fetch;
+//     it hits this endpoint and emits the fresh token to stdout. A 30-min
+//     cron also runs `gh auth login --with-token` using the same helper.
+//
+// # Approach chosen
+//
+// Option B (pre-flight/on-demand): workspaces poll for a token when
+// they need one (credential helper callback). This is preferable over a
+// background goroutine pusher (Option A) because:
+//
+//   - The plugin already maintains its own refresh cache — there is no
+//     token to refresh on the platform side.
+//   - Pushing a new token into running containers requires docker exec /
+//     env mutation, which the architecture explicitly rejects (see issue
+//     #547 "Alternatives considered").
+//   - On-demand is pull-based, stateless, and trivially testable.
+package handlers
+
+import (
+	"log"
+	"net/http"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
+	"github.com/gin-gonic/gin"
+)
+
+// GitHubTokenHandler serves GET /admin/github-installation-token.
+type GitHubTokenHandler struct {
+	registry *provisionhook.Registry
+}
+
+// NewGitHubTokenHandler constructs the handler. registry may be nil when
+// no GitHub App plugin is registered (dev / self-hosted deployments).
+func NewGitHubTokenHandler(reg *provisionhook.Registry) *GitHubTokenHandler {
+	return &GitHubTokenHandler{registry: reg}
+}
+
+// GetInstallationToken handles GET /admin/github-installation-token.
+//
+// Returns:
+//
+//	200 {"token": "ghs_...", "expires_at": "2026-04-17T22:50:00Z"}
+//	404 {"error": "no GitHub App configured"}  — GITHUB_APP_ID not set
+//	404 {"error": "no token provider registered"} — plugin loaded but
+//	     doesn't implement TokenProvider
+//	500 {"error": "token refresh failed"}  — provider returned error
+//
+// The 404 vs 403 distinction is intentional: a 404 means the feature is
+// simply not configured, not that the caller is forbidden. This matches
+// the pattern used by GET /admin/workspaces/:id/test-token.
+//
+// Callers must retry with exponential back-off on 500 — a transient
+// upstream GitHub API error should not permanently block git operations.
+func (h *GitHubTokenHandler) GetInstallationToken(c *gin.Context) {
+	if h.registry == nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": "no GitHub App configured"})
+		return
+	}
+
+	provider := h.registry.FirstTokenProvider()
+	if provider == nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": "no token provider registered"})
+		return
+	}
+
+	token, expiresAt, err := provider.Token(c.Request.Context())
+	if err != nil {
+		log.Printf("[github] token refresh failed: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "token refresh failed"})
+		return
+	}
+
+	if token == "" {
+		log.Printf("[github] token provider returned empty token")
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "token refresh failed: empty token"})
+		return
+	}
+
+	// Never log the token itself.
+	log.Printf("[github] served fresh installation token (expires %s, TTL %.0fs)",
+		expiresAt.Format(time.RFC3339),
+		time.Until(expiresAt).Seconds())
+
+	c.JSON(http.StatusOK, gin.H{
+		"token":      token,
+		"expires_at": expiresAt.UTC().Format(time.RFC3339),
+	})
+}
--- a/platform/internal/handlers/github_token_test.go
+++ b/platform/internal/handlers/github_token_test.go
@ -0,0 +1,232 @@
+package handlers
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
+	"github.com/gin-gonic/gin"
+)
+
+// ─── mock helpers ────────────────────────────────────────────────────────────
+
+// mockMutatorOnly implements EnvMutator but NOT TokenProvider.
+type mockMutatorOnly struct{ name string }
+
+func (m *mockMutatorOnly) Name() string { return m.name }
+func (m *mockMutatorOnly) MutateEnv(_ context.Context, _ string, _ map[string]string) error {
+	return nil
+}
+
+// mockTokenMutator implements both EnvMutator and TokenProvider.
+// Set err to simulate a provider failure; otherwise returns token + expiresAt.
+type mockTokenMutator struct {
+	name      string
+	token     string
+	expiresAt time.Time
+	err       error
+}
+
+func (m *mockTokenMutator) Name() string { return m.name }
+func (m *mockTokenMutator) MutateEnv(_ context.Context, _ string, _ map[string]string) error {
+	return nil
+}
+func (m *mockTokenMutator) Token(_ context.Context) (string, time.Time, error) {
+	return m.token, m.expiresAt, m.err
+}
+
+// ─── request helper ──────────────────────────────────────────────────────────
+
+func newGitHubTokenRequest() (*httptest.ResponseRecorder, *gin.Context) {
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest(http.MethodGet, "/admin/github-installation-token", nil)
+	return w, c
+}
+
+// ─── tests ───────────────────────────────────────────────────────────────────
+
+// TestGitHubToken_NilRegistry — no GitHub App plugin loaded at all.
+// Expect 404 so operators can distinguish "not configured" from "forbidden".
+func TestGitHubToken_NilRegistry(t *testing.T) {
+	h := NewGitHubTokenHandler(nil)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("expected 404 for nil registry, got %d: %s", w.Code, w.Body.String())
+	}
+	var body map[string]string
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("response is not valid JSON: %v", err)
+	}
+	if body["error"] == "" {
+		t.Error("expected non-empty error field in response")
+	}
+}
+
+// TestGitHubToken_NoTokenProvider — plugin registered but doesn't implement
+// TokenProvider (e.g. a non-GitHub mutator in the chain).
+// Expect 404 — the GitHub App endpoint is not available.
+func TestGitHubToken_NoTokenProvider(t *testing.T) {
+	reg := provisionhook.NewRegistry()
+	reg.Register(&mockMutatorOnly{name: "other-plugin"})
+	h := NewGitHubTokenHandler(reg)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("expected 404 when no TokenProvider, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// TestGitHubToken_ProviderError — provider returns an error (e.g. GitHub API
+// unreachable). Expect 500 so the workspace credential helper retries.
+func TestGitHubToken_ProviderError(t *testing.T) {
+	reg := provisionhook.NewRegistry()
+	reg.Register(&mockTokenMutator{
+		name: "github-app-auth",
+		err:  errors.New("github: 503 service unavailable"),
+	})
+	h := NewGitHubTokenHandler(reg)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500 on provider error, got %d: %s", w.Code, w.Body.String())
+	}
+	var body map[string]string
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("response is not valid JSON: %v", err)
+	}
+	if body["error"] == "" {
+		t.Error("expected non-empty error field in 500 response")
+	}
+}
+
+// TestGitHubToken_EmptyToken — provider returns no error but an empty token.
+// This should never happen in normal operation but is a programming error in
+// the plugin; treat it as a refresh failure.
+func TestGitHubToken_EmptyToken(t *testing.T) {
+	exp := time.Now().Add(55 * time.Minute)
+	reg := provisionhook.NewRegistry()
+	reg.Register(&mockTokenMutator{
+		name:      "github-app-auth",
+		token:     "", // empty — plugin bug
+		expiresAt: exp,
+	})
+	h := NewGitHubTokenHandler(reg)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500 for empty token, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// TestGitHubToken_HappyPath — provider returns a valid token.
+// Assert: 200, token present, expires_at is a valid RFC3339 timestamp
+// with a positive TTL (i.e. the token is not already expired).
+func TestGitHubToken_HappyPath(t *testing.T) {
+	exp := time.Now().UTC().Add(55 * time.Minute).Truncate(time.Second)
+	reg := provisionhook.NewRegistry()
+	reg.Register(&mockTokenMutator{
+		name:      "github-app-auth",
+		token:     "ghs_TestTokenABC123",
+		expiresAt: exp,
+	})
+	h := NewGitHubTokenHandler(reg)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var body struct {
+		Token     string `json:"token"`
+		ExpiresAt string `json:"expires_at"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("response is not valid JSON: %v", err)
+	}
+
+	if body.Token != "ghs_TestTokenABC123" {
+		t.Errorf("expected token 'ghs_TestTokenABC123', got %q", body.Token)
+	}
+
+	parsed, err := time.Parse(time.RFC3339, body.ExpiresAt)
+	if err != nil {
+		t.Fatalf("expires_at is not valid RFC3339: %q — %v", body.ExpiresAt, err)
+	}
+	if !parsed.After(time.Now()) {
+		t.Errorf("expires_at %s is in the past — handler served an expired token", body.ExpiresAt)
+	}
+}
+
+// TestGitHubToken_FirstProviderWins — two mutators registered; only the first
+// implements TokenProvider. Confirm the first one is used (registration order).
+func TestGitHubToken_FirstProviderWins(t *testing.T) {
+	exp := time.Now().UTC().Add(55 * time.Minute)
+	reg := provisionhook.NewRegistry()
+	reg.Register(&mockTokenMutator{
+		name:      "first-provider",
+		token:     "ghs_First",
+		expiresAt: exp,
+	})
+	reg.Register(&mockTokenMutator{
+		name:      "second-provider",
+		token:     "ghs_Second",
+		expiresAt: exp,
+	})
+	h := NewGitHubTokenHandler(reg)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var body map[string]string
+	_ = json.Unmarshal(w.Body.Bytes(), &body)
+	if body["token"] != "ghs_First" {
+		t.Errorf("expected first provider's token 'ghs_First', got %q", body["token"])
+	}
+}
+
+// TestGitHubToken_NonProviderBeforeProvider — a plain EnvMutator is registered
+// first, then a TokenProvider. Confirm the provider is still found (skip over
+// non-providers).
+func TestGitHubToken_NonProviderBeforeProvider(t *testing.T) {
+	exp := time.Now().UTC().Add(55 * time.Minute)
+	reg := provisionhook.NewRegistry()
+	reg.Register(&mockMutatorOnly{name: "env-injector"})
+	reg.Register(&mockTokenMutator{
+		name:      "github-app-auth",
+		token:     "ghs_FoundBehindOther",
+		expiresAt: exp,
+	})
+	h := NewGitHubTokenHandler(reg)
+	w, c := newGitHubTokenRequest()
+
+	h.GetInstallationToken(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var body map[string]string
+	_ = json.Unmarshal(w.Body.Bytes(), &body)
+	if body["token"] != "ghs_FoundBehindOther" {
+		t.Errorf("expected 'ghs_FoundBehindOther', got %q", body["token"])
+	}
+}
--- a/platform/internal/handlers/handlers_additional_test.go
+++ b/platform/internal/handlers/handlers_additional_test.go
@ -28,9 +28,11 @@ func TestWorkspaceCreate_WithParentID(t *testing.T) {
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())

 	parentID := "parent-ws-123"
+	mock.ExpectBegin()
 	mock.ExpectExec("INSERT INTO workspaces").
 		WithArgs(sqlmock.AnyArg(), "Child Agent", nil, 1, "langgraph", sqlmock.AnyArg(), &parentID, nil, "none").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
 	mock.ExpectExec("INSERT INTO canvas_layouts").
 		WithArgs(sqlmock.AnyArg(), float64(0), float64(0)).
 		WillReturnResult(sqlmock.NewResult(0, 1))
@ -61,9 +63,11 @@ func TestWorkspaceCreate_ExplicitClaudeCodeRuntime(t *testing.T) {
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())

+	mock.ExpectBegin()
 	mock.ExpectExec("INSERT INTO workspaces").
 		WithArgs(sqlmock.AnyArg(), "CC Agent", nil, 2, "claude-code", sqlmock.AnyArg(), (*string)(nil), nil, "none").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
 	mock.ExpectExec("INSERT INTO canvas_layouts").
 		WithArgs(sqlmock.AnyArg(), float64(10), float64(20)).
 		WillReturnResult(sqlmock.NewResult(0, 1))
--- a/platform/internal/handlers/handlers_test.go
+++ b/platform/internal/handlers/handlers_test.go
@ -248,11 +248,17 @@ func TestWorkspaceCreate(t *testing.T) {
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", "/tmp/configs")

+	// Expect transaction begin for atomic workspace+secrets creation
+	mock.ExpectBegin()
+
 	// Expect workspace INSERT (uuid is dynamic, use AnyArg for id, runtime, awareness_namespace)
 	mock.ExpectExec("INSERT INTO workspaces").
 		WithArgs(sqlmock.AnyArg(), "Test Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
 		WillReturnResult(sqlmock.NewResult(0, 1))

+	// Expect transaction commit (no secrets in this payload)
+	mock.ExpectCommit()
+
 	// Expect canvas_layouts INSERT
 	mock.ExpectExec("INSERT INTO canvas_layouts").
 		WithArgs(sqlmock.AnyArg(), float64(100), float64(200)).
--- a/platform/internal/handlers/org_plugin_allowlist.go
+++ b/platform/internal/handlers/org_plugin_allowlist.go
@ -0,0 +1,254 @@
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"log"
+	"net/http"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/gin-gonic/gin"
+)
+
+// resolveOrgID returns the effective org ID for a workspace: the parent_id
+// when one exists, or the workspace's own ID when it is the org root.
+// Returns an empty string if the workspace is not found.
+func resolveOrgID(ctx context.Context, workspaceID string) (string, error) {
+	var parentID sql.NullString
+	err := db.DB.QueryRowContext(ctx,
+		`SELECT parent_id FROM workspaces WHERE id = $1`,
+		workspaceID,
+	).Scan(&parentID)
+	if err == sql.ErrNoRows {
+		return "", nil
+	}
+	if err != nil {
+		return "", err
+	}
+	if parentID.Valid && parentID.String != "" {
+		return parentID.String, nil
+	}
+	return workspaceID, nil
+}
+
+// checkOrgPluginAllowlist returns (true, reason) when the plugin is blocked
+// by the org's allowlist, or (false, "") when the install is permitted.
+//
+// Semantics:
+//   - No allowlist rows for this org → allow-all (backward compat).
+//   - Allowlist exists and plugin is on it → allowed.
+//   - Allowlist exists and plugin is NOT on it → blocked (403).
+//   - DB errors → fail-open with a log (don't block installs on DB hiccup).
+func checkOrgPluginAllowlist(ctx context.Context, workspaceID, pluginName string) (blocked bool, reason string) {
+	orgID, err := resolveOrgID(ctx, workspaceID)
+	if err != nil {
+		log.Printf("allowlist: resolveOrgID(%s) failed: %v — allowing install", workspaceID, err)
+		return false, ""
+	}
+	if orgID == "" {
+		return false, "" // workspace not found; let later checks handle it
+	}
+
+	var allowed bool
+	err = db.DB.QueryRowContext(ctx, `
+		SELECT EXISTS(
+			SELECT 1 FROM org_plugin_allowlist
+			WHERE org_id = $1 AND plugin_name = $2
+		)
+	`, orgID, pluginName).Scan(&allowed)
+	if err != nil {
+		log.Printf("allowlist: existence check failed (org=%s plugin=%s): %v — allowing install", orgID, pluginName, err)
+		return false, ""
+	}
+	if allowed {
+		return false, "" // explicitly on the allowlist
+	}
+
+	// Check whether an allowlist exists at all. Empty allowlist = allow-all.
+	var count int
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT COUNT(*) FROM org_plugin_allowlist WHERE org_id = $1`,
+		orgID,
+	).Scan(&count); err != nil {
+		log.Printf("allowlist: count check failed (org=%s): %v — allowing install", orgID, err)
+		return false, ""
+	}
+	if count == 0 {
+		return false, "" // no allowlist configured — allow-all
+	}
+
+	return true, fmt.Sprintf("plugin %q is not in the org allowlist", pluginName)
+}
+
+// OrgPluginAllowlistHandler manages the per-org plugin governance registry.
+type OrgPluginAllowlistHandler struct{}
+
+// NewOrgPluginAllowlistHandler constructs an OrgPluginAllowlistHandler.
+func NewOrgPluginAllowlistHandler() *OrgPluginAllowlistHandler {
+	return &OrgPluginAllowlistHandler{}
+}
+
+// allowlistEntry is the JSON shape for a single allowlist record.
+type allowlistEntry struct {
+	PluginName string    `json:"plugin_name"`
+	EnabledBy  string    `json:"enabled_by"`
+	EnabledAt  time.Time `json:"enabled_at"`
+}
+
+// putAllowlistRequest is the request body for PUT /orgs/:id/plugins/allowlist.
+// Plugins holds the complete desired allowlist; the handler replaces the
+// current entries atomically. An empty slice clears the allowlist (allow-all).
+type putAllowlistRequest struct {
+	Plugins   []string `json:"plugins"`
+	EnabledBy string   `json:"enabled_by"` // workspace ID of the admin performing the change
+}
+
+// GetAllowlist handles GET /orgs/:id/plugins/allowlist.
+//
+// Returns the current allowlist for the org workspace identified by :id.
+// An empty array means no allowlist is configured (allow-all). Auth: AdminAuth.
+func (h *OrgPluginAllowlistHandler) GetAllowlist(c *gin.Context) {
+	orgID := c.Param("id")
+	ctx := c.Request.Context()
+
+	// Verify the org workspace exists.
+	var exists bool
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
+		orgID,
+	).Scan(&exists); err != nil {
+		log.Printf("allowlist: org check failed for %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify org"})
+		return
+	}
+	if !exists {
+		c.JSON(http.StatusNotFound, gin.H{"error": "org not found"})
+		return
+	}
+
+	rows, err := db.DB.QueryContext(ctx, `
+		SELECT plugin_name, enabled_by, enabled_at
+		FROM org_plugin_allowlist
+		WHERE org_id = $1
+		ORDER BY plugin_name
+	`, orgID)
+	if err != nil {
+		log.Printf("allowlist: query failed for org %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch allowlist"})
+		return
+	}
+	defer rows.Close()
+
+	entries := make([]allowlistEntry, 0)
+	for rows.Next() {
+		var e allowlistEntry
+		if err := rows.Scan(&e.PluginName, &e.EnabledBy, &e.EnabledAt); err != nil {
+			log.Printf("allowlist: scan error for org %s: %v", orgID, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read allowlist"})
+			return
+		}
+		entries = append(entries, e)
+	}
+	if err := rows.Err(); err != nil {
+		log.Printf("allowlist: rows error for org %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read allowlist"})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"org_id":   orgID,
+		"plugins":  entries,
+		"allow_all": len(entries) == 0,
+	})
+}
+
+// PutAllowlist handles PUT /orgs/:id/plugins/allowlist.
+//
+// Replaces the org's allowlist atomically with the supplied plugin names.
+// Sending an empty plugins array clears the allowlist (reverts to allow-all).
+// Auth: AdminAuth.
+func (h *OrgPluginAllowlistHandler) PutAllowlist(c *gin.Context) {
+	orgID := c.Param("id")
+	ctx := c.Request.Context()
+
+	var req putAllowlistRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+	if req.EnabledBy == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "enabled_by is required"})
+		return
+	}
+
+	// Validate each plugin name for safety before touching the DB.
+	for _, name := range req.Plugins {
+		if err := validatePluginName(name); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{
+				"error":       "invalid plugin name",
+				"plugin_name": name,
+				"detail":      err.Error(),
+			})
+			return
+		}
+	}
+
+	// Verify the org workspace exists.
+	var exists bool
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
+		orgID,
+	).Scan(&exists); err != nil {
+		log.Printf("allowlist: org check failed for %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify org"})
+		return
+	}
+	if !exists {
+		c.JSON(http.StatusNotFound, gin.H{"error": "org not found"})
+		return
+	}
+
+	// Replace atomically: delete all current entries, then insert the new set.
+	tx, err := db.DB.BeginTx(ctx, nil)
+	if err != nil {
+		log.Printf("allowlist: begin tx failed for org %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start transaction"})
+		return
+	}
+	defer tx.Rollback() //nolint:errcheck // superseded by Commit on success path
+
+	if _, err := tx.ExecContext(ctx,
+		`DELETE FROM org_plugin_allowlist WHERE org_id = $1`,
+		orgID,
+	); err != nil {
+		log.Printf("allowlist: delete failed for org %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update allowlist"})
+		return
+	}
+
+	for _, name := range req.Plugins {
+		if _, err := tx.ExecContext(ctx, `
+			INSERT INTO org_plugin_allowlist (org_id, plugin_name, enabled_by)
+			VALUES ($1, $2, $3)
+			ON CONFLICT (org_id, plugin_name) DO NOTHING
+		`, orgID, name, req.EnabledBy); err != nil {
+			log.Printf("allowlist: insert %q failed for org %s: %v", name, orgID, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update allowlist"})
+			return
+		}
+	}
+
+	if err := tx.Commit(); err != nil {
+		log.Printf("allowlist: commit failed for org %s: %v", orgID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to commit allowlist update"})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"org_id":    orgID,
+		"plugins":   req.Plugins,
+		"allow_all": len(req.Plugins) == 0,
+	})
+}
--- a/platform/internal/handlers/org_plugin_allowlist_test.go
+++ b/platform/internal/handlers/org_plugin_allowlist_test.go
@ -0,0 +1,555 @@
+package handlers
+
+import (
+	"bytes"
+	"context"
+	"database/sql"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// ─── helpers ───────────────────────────────────────────────────────────────
+
+func newAllowlistGET(orgID string) (*httptest.ResponseRecorder, *gin.Context) {
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: orgID}}
+	c.Request = httptest.NewRequest(http.MethodGet, "/orgs/"+orgID+"/plugins/allowlist", nil)
+	return w, c
+}
+
+func newAllowlistPUT(orgID string, body interface{}) (*httptest.ResponseRecorder, *gin.Context) {
+	b, _ := json.Marshal(body)
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: orgID}}
+	c.Request = httptest.NewRequest(http.MethodPut, "/orgs/"+orgID+"/plugins/allowlist",
+		bytes.NewReader(b))
+	c.Request.Header.Set("Content-Type", "application/json")
+	return w, c
+}
+
+// ─── GetAllowlist ──────────────────────────────────────────────────────────
+
+func TestGetAllowlist_OrgNotFound(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-missing").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistGET("org-missing")
+	h.GetAllowlist(c)
+
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestGetAllowlist_DBErrorOnOrgCheck(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnError(sql.ErrConnDone)
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistGET("org-1")
+	h.GetAllowlist(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestGetAllowlist_Empty(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "enabled_by", "enabled_at"}))
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistGET("org-1")
+	h.GetAllowlist(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var resp struct {
+		OrgID    string           `json:"org_id"`
+		Plugins  []allowlistEntry `json:"plugins"`
+		AllowAll bool             `json:"allow_all"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad JSON: %v", err)
+	}
+	if resp.OrgID != "org-1" {
+		t.Errorf("expected org_id=org-1, got %q", resp.OrgID)
+	}
+	if len(resp.Plugins) != 0 {
+		t.Errorf("expected 0 plugins, got %d", len(resp.Plugins))
+	}
+	if !resp.AllowAll {
+		t.Error("expected allow_all=true for empty list")
+	}
+}
+
+func TestGetAllowlist_WithEntries(t *testing.T) {
+	mock := setupTestDB(t)
+	ts := time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "enabled_by", "enabled_at"}).
+			AddRow("browser-automation", "admin-ws", ts).
+			AddRow("superpowers", "admin-ws", ts))
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistGET("org-1")
+	h.GetAllowlist(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var resp struct {
+		OrgID    string           `json:"org_id"`
+		Plugins  []allowlistEntry `json:"plugins"`
+		AllowAll bool             `json:"allow_all"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad JSON: %v", err)
+	}
+	if len(resp.Plugins) != 2 {
+		t.Fatalf("expected 2 plugins, got %d", len(resp.Plugins))
+	}
+	if resp.Plugins[0].PluginName != "browser-automation" {
+		t.Errorf("expected first plugin=browser-automation, got %q", resp.Plugins[0].PluginName)
+	}
+	if resp.AllowAll {
+		t.Error("expected allow_all=false when list is non-empty")
+	}
+}
+
+func TestGetAllowlist_DBErrorOnQuery(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectQuery(`SELECT plugin_name, enabled_by, enabled_at`).
+		WithArgs("org-1").
+		WillReturnError(sql.ErrConnDone)
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistGET("org-1")
+	h.GetAllowlist(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// ─── PutAllowlist ──────────────────────────────────────────────────────────
+
+func TestPutAllowlist_MissingEnabledBy(t *testing.T) {
+	setupTestDB(t)
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-1", map[string]interface{}{
+		"plugins": []string{"my-plugin"},
+		// enabled_by intentionally omitted
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestPutAllowlist_InvalidPluginName(t *testing.T) {
+	setupTestDB(t)
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-1", map[string]interface{}{
+		"plugins":    []string{"../../evil"},
+		"enabled_by": "admin-ws",
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400 for invalid plugin name, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestPutAllowlist_OrgNotFound(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-missing").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-missing", map[string]interface{}{
+		"plugins":    []string{"my-plugin"},
+		"enabled_by": "admin-ws",
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestPutAllowlist_AddPlugins(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectBegin()
+	mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
+		WithArgs("org-1").
+		WillReturnResult(sqlmock.NewResult(0, 0))
+	mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
+		WithArgs("org-1", "my-plugin", "admin-ws").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-1", map[string]interface{}{
+		"plugins":    []string{"my-plugin"},
+		"enabled_by": "admin-ws",
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var resp struct {
+		OrgID    string   `json:"org_id"`
+		Plugins  []string `json:"plugins"`
+		AllowAll bool     `json:"allow_all"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad JSON: %v", err)
+	}
+	if len(resp.Plugins) != 1 || resp.Plugins[0] != "my-plugin" {
+		t.Errorf("unexpected plugins: %v", resp.Plugins)
+	}
+	if resp.AllowAll {
+		t.Error("expected allow_all=false for non-empty plugins list")
+	}
+}
+
+func TestPutAllowlist_ClearAllowlist(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectBegin()
+	mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
+		WithArgs("org-1").
+		WillReturnResult(sqlmock.NewResult(0, 3))
+	// No INSERT expected — empty plugins slice.
+	mock.ExpectCommit()
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-1", map[string]interface{}{
+		"plugins":    []string{},
+		"enabled_by": "admin-ws",
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var resp struct {
+		AllowAll bool `json:"allow_all"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad JSON: %v", err)
+	}
+	if !resp.AllowAll {
+		t.Error("expected allow_all=true after clearing all plugins")
+	}
+}
+
+func TestPutAllowlist_MultiplePlugins(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectBegin()
+	mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
+		WithArgs("org-1").
+		WillReturnResult(sqlmock.NewResult(0, 0))
+	mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
+		WithArgs("org-1", "browser-automation", "admin-ws").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
+		WithArgs("org-1", "superpowers", "admin-ws").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-1", map[string]interface{}{
+		"plugins":    []string{"browser-automation", "superpowers"},
+		"enabled_by": "admin-ws",
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestPutAllowlist_InsertFails(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("org-1").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	mock.ExpectBegin()
+	mock.ExpectExec(`DELETE FROM org_plugin_allowlist`).
+		WithArgs("org-1").
+		WillReturnResult(sqlmock.NewResult(0, 0))
+	mock.ExpectExec(`INSERT INTO org_plugin_allowlist`).
+		WithArgs("org-1", "my-plugin", "admin-ws").
+		WillReturnError(sql.ErrConnDone)
+	mock.ExpectRollback()
+
+	h := NewOrgPluginAllowlistHandler()
+	w, c := newAllowlistPUT("org-1", map[string]interface{}{
+		"plugins":    []string{"my-plugin"},
+		"enabled_by": "admin-ws",
+	})
+	h.PutAllowlist(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500 on insert failure, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// ─── resolveOrgID ──────────────────────────────────────────────────────────
+
+func TestResolveOrgID_OrgRoot(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// workspace has no parent → it IS the org root
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-root").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	got, err := resolveOrgID(context.Background(), "ws-root")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "ws-root" {
+		t.Errorf("expected ws-root, got %q", got)
+	}
+}
+
+func TestResolveOrgID_WithParent(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// workspace has a parent → parent is the org root
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-child").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
+
+	got, err := resolveOrgID(context.Background(), "ws-child")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "ws-parent" {
+		t.Errorf("expected ws-parent, got %q", got)
+	}
+}
+
+func TestResolveOrgID_NotFound(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-ghost").
+		WillReturnError(sql.ErrNoRows)
+
+	got, err := resolveOrgID(context.Background(), "ws-ghost")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "" {
+		t.Errorf("expected empty string for not-found workspace, got %q", got)
+	}
+}
+
+// ─── checkOrgPluginAllowlist ───────────────────────────────────────────────
+
+func TestCheckOrgPluginAllowlist_AllowAll_EmptyList(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// resolveOrgID: no parent → ws-1 is org root
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	// plugin NOT in list
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("ws-1", "my-plugin").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
+
+	// count = 0 → allow-all
+	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+	blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "my-plugin")
+	if blocked {
+		t.Errorf("expected not blocked (allow-all), got blocked: %s", reason)
+	}
+}
+
+func TestCheckOrgPluginAllowlist_Allowed_OnList(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// resolveOrgID: no parent
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	// plugin IS in the allowlist
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("ws-1", "my-plugin").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "my-plugin")
+	if blocked {
+		t.Errorf("expected not blocked (on list), got blocked: %s", reason)
+	}
+}
+
+func TestCheckOrgPluginAllowlist_Blocked_NotOnList(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// resolveOrgID: no parent
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	// plugin NOT in the list
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("ws-1", "evil-plugin").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
+
+	// count > 0 → allowlist is active
+	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(2))
+
+	blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-1", "evil-plugin")
+	if !blocked {
+		t.Error("expected plugin to be blocked (not on non-empty allowlist)")
+	}
+	if reason == "" {
+		t.Error("expected non-empty reason when blocked")
+	}
+}
+
+func TestCheckOrgPluginAllowlist_ChildWorkspace_UsesParentOrg(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// resolveOrgID: ws-child has parent ws-parent
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-child").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
+
+	// allowlist check uses parent org ID (ws-parent)
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("ws-parent", "my-plugin").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+
+	blocked, reason := checkOrgPluginAllowlist(context.Background(), "ws-child", "my-plugin")
+	if blocked {
+		t.Errorf("expected not blocked (on parent's allowlist), got blocked: %s", reason)
+	}
+}
+
+func TestCheckOrgPluginAllowlist_FailOpen_OnResolveError(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// DB error during resolveOrgID → fail-open
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-1").
+		WillReturnError(sql.ErrConnDone)
+
+	blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
+	if blocked {
+		t.Error("expected fail-open (not blocked) on DB error during resolveOrgID")
+	}
+}
+
+func TestCheckOrgPluginAllowlist_FailOpen_OnExistsError(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	// DB error on EXISTS check → fail-open
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("ws-1", "any-plugin").
+		WillReturnError(sql.ErrConnDone)
+
+	blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
+	if blocked {
+		t.Error("expected fail-open (not blocked) on DB error during EXISTS check")
+	}
+}
+
+func TestCheckOrgPluginAllowlist_FailOpen_OnCountError(t *testing.T) {
+	mock := setupTestDB(t)
+
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs("ws-1", "any-plugin").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
+
+	// DB error on COUNT check → fail-open
+	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
+		WithArgs("ws-1").
+		WillReturnError(sql.ErrConnDone)
+
+	blocked, _ := checkOrgPluginAllowlist(context.Background(), "ws-1", "any-plugin")
+	if blocked {
+		t.Error("expected fail-open (not blocked) on DB error during COUNT check")
+	}
+}
--- a/platform/internal/handlers/plugins_install.go
+++ b/platform/internal/handlers/plugins_install.go
@ -63,6 +63,14 @@ func (h *PluginsHandler) Install(c *gin.Context) {
 	// has already cleaned it up (and its returned result is nil).
 	defer os.RemoveAll(result.StagedDir)

+	// Org plugin allowlist gate (#591).
+	// If the workspace's org has a non-empty allowlist, the plugin must be
+	// on it. An empty allowlist means allow-all (backward compat).
+	if blocked, reason := checkOrgPluginAllowlist(ctx, workspaceID, result.PluginName); blocked {
+		c.JSON(http.StatusForbidden, gin.H{"error": reason})
+		return
+	}
+
 	if err := h.deliverToContainer(ctx, workspaceID, result); err != nil {
 		var he *httpErr
 		if errors.As(err, &he) {
--- a/platform/internal/handlers/sse.go
+++ b/platform/internal/handlers/sse.go
@ -0,0 +1,107 @@
+package handlers
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
+	"github.com/gin-gonic/gin"
+)
+
+// aguiEvent is the AG-UI envelope written to the SSE stream.
+// Spec: {"type":"<event_name>","timestamp":<unix_ms>,"data":{...}}
+type aguiEvent struct {
+	Type      string          `json:"type"`
+	Timestamp int64           `json:"timestamp"` // Unix milliseconds
+	Data      json.RawMessage `json:"data"`
+}
+
+// SSEHandler streams workspace events as AG-UI-compatible Server-Sent Events.
+type SSEHandler struct {
+	broadcaster *events.Broadcaster
+}
+
+// NewSSEHandler returns an SSEHandler that sources events from b.
+func NewSSEHandler(b *events.Broadcaster) *SSEHandler {
+	return &SSEHandler{broadcaster: b}
+}
+
+// StreamEvents handles GET /workspaces/:id/events/stream.
+//
+// Authentication is enforced by the upstream WorkspaceAuth middleware (bearer
+// token bound to :id). This handler only needs to:
+//  1. Verify the workspace exists (returns 404 if not).
+//  2. Set SSE headers.
+//  3. Subscribe to the in-process broadcaster and relay events until the
+//     client disconnects (context cancellation).
+//
+// AG-UI envelope per event:
+//
+//	data: {"type":"<event>","timestamp":<unix_ms>,"data":{...}}\n\n
+func (h *SSEHandler) StreamEvents(c *gin.Context) {
+	workspaceID := c.Param("id")
+	ctx := c.Request.Context()
+
+	// Verify the workspace exists — 404 early rather than serving an empty stream.
+	var exists bool
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
+		workspaceID,
+	).Scan(&exists); err != nil {
+		log.Printf("SSE: workspace existence check failed for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify workspace"})
+		return
+	}
+	if !exists {
+		c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
+		return
+	}
+
+	// SSE response headers.
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	// Instruct nginx / reverse-proxies to disable buffering so events reach
+	// the client immediately rather than being held in a proxy buffer.
+	c.Header("X-Accel-Buffering", "no")
+
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		// Should never happen with gin's responseWriter, but guard defensively.
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "streaming not supported"})
+		return
+	}
+
+	ch, cancel := h.broadcaster.SubscribeSSE(workspaceID)
+	defer cancel()
+
+	// Send an initial SSE comment so the client knows the stream is live.
+	fmt.Fprintf(c.Writer, ": ping\n\n")
+	flusher.Flush()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case msg, ok := <-ch:
+			if !ok {
+				return
+			}
+			env := aguiEvent{
+				Type:      msg.Event,
+				Timestamp: msg.Timestamp.UnixMilli(),
+				Data:      msg.Payload,
+			}
+			b, err := json.Marshal(env)
+			if err != nil {
+				log.Printf("SSE: marshal error for workspace %s event %s: %v", workspaceID, msg.Event, err)
+				continue
+			}
+			fmt.Fprintf(c.Writer, "data: %s\n\n", b)
+			flusher.Flush()
+		}
+	}
+}
--- a/platform/internal/handlers/sse_test.go
+++ b/platform/internal/handlers/sse_test.go
@ -0,0 +1,237 @@
+package handlers
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// expectWorkspaceExists queues the EXISTS query that StreamEvents fires first.
+func expectWorkspaceExists(mock sqlmock.Sqlmock, workspaceID string, exists bool) {
+	rows := sqlmock.NewRows([]string{"exists"}).AddRow(exists)
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs(workspaceID).
+		WillReturnRows(rows)
+}
+
+// runSSEHandler starts StreamEvents in a background goroutine using a
+// cancellable context, waits waitAfterStart for the handler to subscribe,
+// then returns a drain function (cancel + wait for goroutine exit).
+func runSSEHandler(t *testing.T, h *SSEHandler, workspaceID string) (
+	w *httptest.ResponseRecorder,
+	inject func(), // call to cancel immediately
+	done <-chan struct{},
+) {
+	t.Helper()
+	ctx, cancel := context.WithCancel(context.Background())
+	w = httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: workspaceID}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/"+workspaceID+"/events/stream", nil).WithContext(ctx)
+
+	doneCh := make(chan struct{})
+	go func() {
+		defer close(doneCh)
+		h.StreamEvents(c)
+	}()
+
+	return w, cancel, doneCh
+}
+
+// TestSSE_ContentType verifies the handler sets text/event-stream on the response.
+func TestSSE_ContentType(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExists(mock, "ws-1", true)
+
+	b := newTestBroadcaster()
+	h := NewSSEHandler(b)
+
+	w, cancel, done := runSSEHandler(t, h, "ws-1")
+
+	// Allow the handler to subscribe, then tear it down.
+	time.Sleep(30 * time.Millisecond)
+	cancel()
+	<-done
+
+	ct := w.Header().Get("Content-Type")
+	if !strings.HasPrefix(ct, "text/event-stream") {
+		t.Errorf("expected Content-Type text/event-stream, got %q", ct)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestSSE_InitialPing verifies the handler emits the ": ping" SSE comment on connect.
+func TestSSE_InitialPing(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExists(mock, "ws-1", true)
+
+	b := newTestBroadcaster()
+	h := NewSSEHandler(b)
+
+	w, cancel, done := runSSEHandler(t, h, "ws-1")
+	time.Sleep(30 * time.Millisecond)
+	cancel()
+	<-done
+
+	body := w.Body.String()
+	if !strings.Contains(body, ": ping") {
+		t.Errorf("expected SSE ping comment, body was:\n%s", body)
+	}
+}
+
+// TestSSE_AGUIFormat verifies that a broadcast event is wrapped in the AG-UI envelope.
+func TestSSE_AGUIFormat(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExists(mock, "ws-1", true)
+
+	b := newTestBroadcaster()
+	h := NewSSEHandler(b)
+
+	w, cancel, done := runSSEHandler(t, h, "ws-1")
+
+	// Wait for the handler goroutine to reach its select loop.
+	time.Sleep(30 * time.Millisecond)
+	b.BroadcastOnly("ws-1", "TASK_UPDATED", map[string]string{"status": "running"})
+	time.Sleep(30 * time.Millisecond)
+	cancel()
+	<-done
+
+	body := w.Body.String()
+	// Find the first "data: ..." line.
+	var dataLine string
+	for _, line := range strings.Split(body, "\n") {
+		if strings.HasPrefix(line, "data: ") {
+			dataLine = strings.TrimPrefix(line, "data: ")
+			break
+		}
+	}
+	if dataLine == "" {
+		t.Fatalf("no data: line found in SSE response:\n%s", body)
+	}
+
+	var env struct {
+		Type      string          `json:"type"`
+		Timestamp int64           `json:"timestamp"`
+		Data      json.RawMessage `json:"data"`
+	}
+	if err := json.Unmarshal([]byte(dataLine), &env); err != nil {
+		t.Fatalf("invalid AG-UI envelope JSON %q: %v", dataLine, err)
+	}
+	if env.Type != "TASK_UPDATED" {
+		t.Errorf("expected type TASK_UPDATED, got %q", env.Type)
+	}
+	if env.Timestamp <= 0 {
+		t.Errorf("expected positive timestamp, got %d", env.Timestamp)
+	}
+	if len(env.Data) == 0 || string(env.Data) == "null" {
+		t.Errorf("expected non-null data field, got %q", string(env.Data))
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestSSE_WorkspaceFilter verifies that events for a different workspace are NOT delivered.
+func TestSSE_WorkspaceFilter(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExists(mock, "ws-1", true)
+
+	b := newTestBroadcaster()
+	h := NewSSEHandler(b)
+
+	w, cancel, done := runSSEHandler(t, h, "ws-1")
+
+	time.Sleep(30 * time.Millisecond)
+	// Broadcast to a completely different workspace.
+	b.BroadcastOnly("ws-99", "AGENT_MESSAGE", map[string]string{"text": "secret"})
+	time.Sleep(30 * time.Millisecond)
+	cancel()
+	<-done
+
+	body := w.Body.String()
+	for _, line := range strings.Split(body, "\n") {
+		if strings.HasPrefix(line, "data: ") {
+			t.Errorf("expected no data: events for different workspace, got: %s", line)
+		}
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestSSE_WorkspaceNotFound verifies a 404 is returned when the workspace does not exist.
+func TestSSE_WorkspaceNotFound(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExists(mock, "missing-ws", false)
+
+	b := newTestBroadcaster()
+	h := NewSSEHandler(b)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "missing-ws"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/missing-ws/events/stream", nil)
+
+	h.StreamEvents(c)
+
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("expected 404 for missing workspace, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestSSE_MultipleEventsDelivered verifies multiple sequential broadcasts all arrive.
+func TestSSE_MultipleEventsDelivered(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExists(mock, "ws-1", true)
+
+	b := newTestBroadcaster()
+	h := NewSSEHandler(b)
+
+	w, cancel, done := runSSEHandler(t, h, "ws-1")
+
+	time.Sleep(30 * time.Millisecond)
+	b.BroadcastOnly("ws-1", "AGENT_MESSAGE", map[string]string{"msg": "one"})
+	b.BroadcastOnly("ws-1", "TASK_UPDATED", map[string]string{"status": "done"})
+	b.BroadcastOnly("ws-1", "A2A_RESPONSE", map[string]string{"result": "ok"})
+	time.Sleep(50 * time.Millisecond)
+	cancel()
+	<-done
+
+	body := w.Body.String()
+	var dataLines []string
+	for _, line := range strings.Split(body, "\n") {
+		if strings.HasPrefix(line, "data: ") {
+			dataLines = append(dataLines, line)
+		}
+	}
+	if len(dataLines) != 3 {
+		t.Errorf("expected 3 data: lines, got %d:\n%s", len(dataLines), body)
+	}
+
+	// Verify event types appear in order.
+	expectedTypes := []string{"AGENT_MESSAGE", "TASK_UPDATED", "A2A_RESPONSE"}
+	for i, dl := range dataLines {
+		var env struct {
+			Type string `json:"type"`
+		}
+		if err := json.Unmarshal([]byte(strings.TrimPrefix(dl, "data: ")), &env); err != nil {
+			t.Fatalf("line %d: invalid JSON: %v", i, err)
+		}
+		if env.Type != expectedTypes[i] {
+			t.Errorf("line %d: expected type %s, got %s", i, expectedTypes[i], env.Type)
+		}
+	}
+}
--- a/platform/internal/handlers/workspace.go
+++ b/platform/internal/handlers/workspace.go
@ -10,6 +10,7 @@ import (
 	"path/filepath"
 	"strings"

+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
@ -59,6 +60,14 @@ func (h *WorkspaceHandler) SetEnvMutators(r *provisionhook.Registry) {
 	h.envMutators = r
 }

+// TokenRegistry returns the provisionhook.Registry so the router can
+// wire the GET /admin/github-installation-token handler without coupling
+// to WorkspaceHandler's internals. Returns nil when no plugin has been
+// registered (dev / self-hosted deployments without a GitHub App).
+func (h *WorkspaceHandler) TokenRegistry() *provisionhook.Registry {
+	return h.envMutators
+}
+
 // Create handles POST /workspaces
 func (h *WorkspaceHandler) Create(c *gin.Context) {
 	var payload models.CreateWorkspacePayload
@ -129,17 +138,59 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
 		return
 	}

-	// Insert workspace with runtime persisted in DB
-	_, err := db.DB.ExecContext(ctx, `
+	// Begin a transaction so the workspace row and any initial secrets are
+	// committed atomically.  A secret-encrypt or DB error rolls back the
+	// workspace insert so we never leave a workspace row with missing secrets.
+	tx, txErr := db.DB.BeginTx(ctx, nil)
+	if txErr != nil {
+		log.Printf("Create workspace: begin tx error: %v", txErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
+		return
+	}
+
+	// Insert workspace with runtime persisted in DB (inside transaction)
+	_, err := tx.ExecContext(ctx, `
 		INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access)
 		VALUES ($1, $2, $3, $4, $5, $6, 'provisioning', $7, $8, $9)
 	`, id, payload.Name, role, payload.Tier, payload.Runtime, awarenessNamespace, payload.ParentID, workspaceDir, workspaceAccess)
 	if err != nil {
+		tx.Rollback() //nolint:errcheck
 		log.Printf("Create workspace error: %v", err)
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
 		return
 	}

+	// Persist initial secrets from the create payload (inside same transaction).
+	// nil/empty map is a no-op.  Any failure rolls back the workspace insert
+	// so we never have a workspace row without its intended secrets.
+	for k, v := range payload.Secrets {
+		encrypted, encErr := crypto.Encrypt([]byte(v))
+		if encErr != nil {
+			tx.Rollback() //nolint:errcheck
+			log.Printf("Create workspace %s: failed to encrypt secret %q: %v", id, k, encErr)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encrypt secret: " + k})
+			return
+		}
+		version := crypto.CurrentEncryptionVersion()
+		if _, dbErr := tx.ExecContext(ctx, `
+			INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
+			VALUES ($1, $2, $3, $4)
+			ON CONFLICT (workspace_id, key) DO UPDATE
+				SET encrypted_value = $3, encryption_version = $4, updated_at = now()
+		`, id, k, encrypted, version); dbErr != nil {
+			tx.Rollback() //nolint:errcheck
+			log.Printf("Create workspace %s: failed to persist secret %q: %v", id, k, dbErr)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save secret: " + k})
+			return
+		}
+	}
+
+	if commitErr := tx.Commit(); commitErr != nil {
+		log.Printf("Create workspace %s: transaction commit failed: %v", id, commitErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create workspace"})
+		return
+	}
+
 	// Insert canvas layout — non-fatal: workspace can be dragged into position later
 	if _, err := db.DB.ExecContext(ctx, `
 		INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)
--- a/platform/internal/handlers/workspace_metrics.go
+++ b/platform/internal/handlers/workspace_metrics.go
@ -0,0 +1,125 @@
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"log"
+	"net/http"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/gin-gonic/gin"
+)
+
+// Pricing constants — Claude Sonnet default rates (USD per token).
+// Callers with different models should override via env vars in a future phase.
+const (
+	tokenCostPerInputToken  = 0.000003  // $3 / 1M input tokens
+	tokenCostPerOutputToken = 0.000015  // $15 / 1M output tokens
+)
+
+// MetricsHandler serves GET /workspaces/:id/metrics.
+type MetricsHandler struct{}
+
+// NewMetricsHandler returns a MetricsHandler.
+func NewMetricsHandler() *MetricsHandler { return &MetricsHandler{} }
+
+// GetMetrics handles GET /workspaces/:id/metrics.
+//
+// Returns aggregated LLM token usage for the current UTC day.
+// Auth: WorkspaceAuth middleware (bearer token bound to :id).
+//
+// Response:
+//
+//	{
+//	  "input_tokens":        <N>,
+//	  "output_tokens":       <N>,
+//	  "total_calls":         <N>,
+//	  "estimated_cost_usd":  "0.000000",
+//	  "period_start":        "2026-04-17T00:00:00Z",
+//	  "period_end":          "2026-04-18T00:00:00Z"
+//	}
+func (h *MetricsHandler) GetMetrics(c *gin.Context) {
+	workspaceID := c.Param("id")
+	ctx := c.Request.Context()
+
+	// Verify workspace exists — 404 before touching usage table.
+	var wsExists bool
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`,
+		workspaceID,
+	).Scan(&wsExists); err != nil {
+		log.Printf("metrics: workspace check failed for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to verify workspace"})
+		return
+	}
+	if !wsExists {
+		c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
+		return
+	}
+
+	periodStart := todayUTC()
+	periodEnd := periodStart.Add(24 * time.Hour)
+
+	var inputTokens, outputTokens int64
+	var callCount int64
+	var estimatedCost float64
+
+	err := db.DB.QueryRowContext(ctx, `
+		SELECT
+			COALESCE(SUM(input_tokens), 0),
+			COALESCE(SUM(output_tokens), 0),
+			COALESCE(SUM(call_count), 0),
+			COALESCE(SUM(estimated_cost_usd), 0)
+		FROM workspace_token_usage
+		WHERE workspace_id = $1
+		  AND period_start = $2
+	`, workspaceID, periodStart).Scan(&inputTokens, &outputTokens, &callCount, &estimatedCost)
+	if err != nil && err != sql.ErrNoRows {
+		log.Printf("metrics: query failed for workspace %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch metrics"})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"input_tokens":       inputTokens,
+		"output_tokens":      outputTokens,
+		"total_calls":        callCount,
+		"estimated_cost_usd": fmt.Sprintf("%.6f", estimatedCost),
+		"period_start":       periodStart.Format(time.RFC3339),
+		"period_end":         periodEnd.Format(time.RFC3339),
+	})
+}
+
+// todayUTC returns the start of the current UTC day (midnight).
+func todayUTC() time.Time {
+	now := time.Now().UTC()
+	return time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC)
+}
+
+// upsertTokenUsage accumulates input/output token counts for workspaceID's
+// current UTC day. Cost is estimated using the default per-token pricing
+// constants. Always call in a detached goroutine — never block the A2A path.
+func upsertTokenUsage(ctx context.Context, workspaceID string, inputTokens, outputTokens int64) {
+	if inputTokens == 0 && outputTokens == 0 {
+		return
+	}
+	periodStart := todayUTC()
+	cost := float64(inputTokens)*tokenCostPerInputToken + float64(outputTokens)*tokenCostPerOutputToken
+
+	_, err := db.DB.ExecContext(ctx, `
+		INSERT INTO workspace_token_usage
+			(workspace_id, period_start, input_tokens, output_tokens, call_count, estimated_cost_usd, updated_at)
+		VALUES ($1, $2, $3, $4, 1, $5, NOW())
+		ON CONFLICT (workspace_id, period_start) DO UPDATE SET
+			input_tokens       = workspace_token_usage.input_tokens       + EXCLUDED.input_tokens,
+			output_tokens      = workspace_token_usage.output_tokens      + EXCLUDED.output_tokens,
+			call_count         = workspace_token_usage.call_count         + 1,
+			estimated_cost_usd = workspace_token_usage.estimated_cost_usd + EXCLUDED.estimated_cost_usd,
+			updated_at         = NOW()
+	`, workspaceID, periodStart, inputTokens, outputTokens, cost)
+	if err != nil {
+		log.Printf("upsertTokenUsage: failed for workspace %s: %v", workspaceID, err)
+	}
+}
--- a/platform/internal/handlers/workspace_metrics_test.go
+++ b/platform/internal/handlers/workspace_metrics_test.go
@ -0,0 +1,262 @@
+package handlers
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// usageColumns matches the SELECT in GetMetrics.
+var usageColumns = []string{
+	"sum_input_tokens", "sum_output_tokens", "sum_call_count", "sum_cost",
+}
+
+// expectWorkspaceExistsMetrics queues the EXISTS check in GetMetrics.
+func expectWorkspaceExistsMetrics(mock sqlmock.Sqlmock, workspaceID string, exists bool) {
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WithArgs(workspaceID).
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(exists))
+}
+
+// TestGetMetrics_HappyPath verifies the handler returns correct aggregated data.
+func TestGetMetrics_HappyPath(t *testing.T) {
+	mock := setupTestDB(t)
+
+	expectWorkspaceExistsMetrics(mock, "ws-1", true)
+
+	// Simulate one row with usage data.
+	mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
+		WithArgs("ws-1", sqlmock.AnyArg()).
+		WillReturnRows(sqlmock.NewRows(usageColumns).
+			AddRow(int64(1500), int64(300), int64(5), float64(0.009)))
+
+	h := NewMetricsHandler()
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/metrics", nil)
+
+	h.GetMetrics(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var resp struct {
+		InputTokens      int64  `json:"input_tokens"`
+		OutputTokens     int64  `json:"output_tokens"`
+		TotalCalls       int64  `json:"total_calls"`
+		EstimatedCost    string `json:"estimated_cost_usd"`
+		PeriodStart      string `json:"period_start"`
+		PeriodEnd        string `json:"period_end"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("invalid JSON: %v\n%s", err, w.Body.String())
+	}
+
+	if resp.InputTokens != 1500 {
+		t.Errorf("expected input_tokens=1500, got %d", resp.InputTokens)
+	}
+	if resp.OutputTokens != 300 {
+		t.Errorf("expected output_tokens=300, got %d", resp.OutputTokens)
+	}
+	if resp.TotalCalls != 5 {
+		t.Errorf("expected total_calls=5, got %d", resp.TotalCalls)
+	}
+	if resp.EstimatedCost == "" {
+		t.Error("expected non-empty estimated_cost_usd")
+	}
+	if resp.PeriodStart == "" {
+		t.Error("expected non-empty period_start")
+	}
+	if resp.PeriodEnd == "" {
+		t.Error("expected non-empty period_end")
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestGetMetrics_WorkspaceNotFound verifies a 404 when workspace is absent.
+func TestGetMetrics_WorkspaceNotFound(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExistsMetrics(mock, "ghost", false)
+
+	h := NewMetricsHandler()
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ghost"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ghost/metrics", nil)
+
+	h.GetMetrics(c)
+
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestGetMetrics_EmptyPeriod verifies the handler returns zeros when no usage exists yet.
+func TestGetMetrics_EmptyPeriod(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExistsMetrics(mock, "ws-new", true)
+
+	// COALESCE returns 0 for each column when no rows match.
+	mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
+		WithArgs("ws-new", sqlmock.AnyArg()).
+		WillReturnRows(sqlmock.NewRows(usageColumns).
+			AddRow(int64(0), int64(0), int64(0), float64(0)))
+
+	h := NewMetricsHandler()
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-new"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-new/metrics", nil)
+
+	h.GetMetrics(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("invalid JSON: %v", err)
+	}
+	// Verify period_start and period_end are present and distinct.
+	ps, _ := resp["period_start"].(string)
+	pe, _ := resp["period_end"].(string)
+	if ps == "" || pe == "" {
+		t.Errorf("expected non-empty period_start/period_end, got %q / %q", ps, pe)
+	}
+	if ps == pe {
+		t.Errorf("period_start and period_end must differ, both are %q", ps)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet DB expectations: %v", err)
+	}
+}
+
+// TestGetMetrics_CostFormat verifies estimated_cost_usd is formatted to 6 decimal places.
+func TestGetMetrics_CostFormat(t *testing.T) {
+	mock := setupTestDB(t)
+	expectWorkspaceExistsMetrics(mock, "ws-1", true)
+
+	mock.ExpectQuery(`SELECT\s+COALESCE\(SUM\(input_tokens\)`).
+		WithArgs("ws-1", sqlmock.AnyArg()).
+		WillReturnRows(sqlmock.NewRows(usageColumns).
+			AddRow(int64(1000000), int64(0), int64(1), float64(3.0)))
+
+	h := NewMetricsHandler()
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/metrics", nil)
+
+	h.GetMetrics(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("invalid JSON: %v", err)
+	}
+	cost, _ := resp["estimated_cost_usd"].(string)
+	if len(cost) < 8 {
+		// "3.000000" is 8 chars minimum
+		t.Errorf("expected at least 8-char cost string, got %q", cost)
+	}
+}
+
+// ---- parseUsageFromA2AResponse tests ----
+
+func TestParseUsage_JSONRPCResultEnvelope(t *testing.T) {
+	body := []byte(`{
+		"jsonrpc": "2.0",
+		"id": "abc",
+		"result": {
+			"usage": {
+				"input_tokens": 100,
+				"output_tokens": 50
+			}
+		}
+	}`)
+	in, out := parseUsageFromA2AResponse(body)
+	if in != 100 {
+		t.Errorf("expected input_tokens=100, got %d", in)
+	}
+	if out != 50 {
+		t.Errorf("expected output_tokens=50, got %d", out)
+	}
+}
+
+func TestParseUsage_TopLevelUsage(t *testing.T) {
+	body := []byte(`{
+		"usage": {
+			"input_tokens": 200,
+			"output_tokens": 75
+		}
+	}`)
+	in, out := parseUsageFromA2AResponse(body)
+	if in != 200 {
+		t.Errorf("expected input_tokens=200, got %d", in)
+	}
+	if out != 75 {
+		t.Errorf("expected output_tokens=75, got %d", out)
+	}
+}
+
+func TestParseUsage_NoUsageField(t *testing.T) {
+	body := []byte(`{"jsonrpc":"2.0","id":"x","result":{"message":"hello"}}`)
+	in, out := parseUsageFromA2AResponse(body)
+	if in != 0 || out != 0 {
+		t.Errorf("expected (0, 0) with no usage field, got (%d, %d)", in, out)
+	}
+}
+
+func TestParseUsage_ZeroTokensIgnored(t *testing.T) {
+	body := []byte(`{"result":{"usage":{"input_tokens":0,"output_tokens":0}}}`)
+	in, out := parseUsageFromA2AResponse(body)
+	if in != 0 || out != 0 {
+		t.Errorf("expected (0, 0) for zero tokens, got (%d, %d)", in, out)
+	}
+}
+
+func TestParseUsage_EmptyBody(t *testing.T) {
+	in, out := parseUsageFromA2AResponse([]byte{})
+	if in != 0 || out != 0 {
+		t.Errorf("expected (0, 0) for empty body, got (%d, %d)", in, out)
+	}
+}
+
+func TestParseUsage_InvalidJSON(t *testing.T) {
+	in, out := parseUsageFromA2AResponse([]byte("not json"))
+	if in != 0 || out != 0 {
+		t.Errorf("expected (0, 0) for invalid JSON, got (%d, %d)", in, out)
+	}
+}
+
+func TestParseUsage_NestedResultPreferredOverTopLevel(t *testing.T) {
+	// result.usage should be preferred over top-level usage.
+	body := []byte(`{
+		"usage": {"input_tokens": 999, "output_tokens": 999},
+		"result": {
+			"usage": {"input_tokens": 42, "output_tokens": 21}
+		}
+	}`)
+	in, out := parseUsageFromA2AResponse(body)
+	if in != 42 {
+		t.Errorf("expected result.usage.input_tokens=42, got %d", in)
+	}
+	if out != 21 {
+		t.Errorf("expected result.usage.output_tokens=21, got %d", out)
+	}
+}
--- a/platform/internal/handlers/workspace_test.go
+++ b/platform/internal/handlers/workspace_test.go
@ -146,10 +146,12 @@ func TestWorkspaceCreate_DBInsertError(t *testing.T) {
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())

-	// Workspace INSERT fails
+	// Transaction begins, workspace INSERT fails, transaction is rolled back.
+	mock.ExpectBegin()
 	mock.ExpectExec("INSERT INTO workspaces").
 		WithArgs(sqlmock.AnyArg(), "Failing Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
 		WillReturnError(sql.ErrConnDone)
+	mock.ExpectRollback()

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@ -175,10 +177,13 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) {
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())

+	// Transaction wraps the workspace INSERT (no secrets in this request).
+	mock.ExpectBegin()
 	// Expect workspace INSERT with defaulted tier=1, runtime="langgraph"
 	mock.ExpectExec("INSERT INTO workspaces").
 		WithArgs(sqlmock.AnyArg(), "Default Agent", nil, 1, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()

 	// Expect canvas_layouts INSERT (x=0, y=0 — defaults)
 	mock.ExpectExec("INSERT INTO canvas_layouts").
@ -215,6 +220,117 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) {
 	}
 }

+// TestWorkspaceCreate_WithSecrets_Persists asserts that secrets in the create
+// payload are written to workspace_secrets inside the same transaction as the
+// workspace row, and that the handler returns 201.
+func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	// External workspace: simplest code path — no provisioner goroutine.
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO workspaces").
+		WithArgs(sqlmock.AnyArg(), "Hermes Agent", nil, 1, "hermes", sqlmock.AnyArg(), (*string)(nil), nil, "none").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	// Secret inserted inside the same transaction.
+	mock.ExpectExec("INSERT INTO workspace_secrets").
+		WithArgs(sqlmock.AnyArg(), "HERMES_API_KEY", sqlmock.AnyArg(), sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+
+	// canvas_layouts (non-fatal, outside tx)
+	mock.ExpectExec("INSERT INTO canvas_layouts").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	body := `{"name":"Hermes Agent","runtime":"hermes","external":true,"secrets":{"HERMES_API_KEY":"sk-test-123"}}`
+	c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Create(c)
+
+	if w.Code != http.StatusCreated {
+		t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestWorkspaceCreate_SecretPersistFails_RollsBack asserts that a DB error
+// while persisting a secret causes the entire transaction to roll back and
+// the handler to return 500.  The workspace row must NOT be committed.
+func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO workspaces").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec("INSERT INTO workspace_secrets").
+		WillReturnError(sql.ErrConnDone) // DB failure while writing secret
+	mock.ExpectRollback() // workspace insert must be rolled back
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	body := `{"name":"Rollback Agent","secrets":{"OPENAI_API_KEY":"sk-fail"}}`
+	c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Create(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Errorf("expected status 500, got %d: %s", w.Code, w.Body.String())
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestWorkspaceCreate_EmptySecrets_OK asserts that an empty secrets map (or
+// no secrets key at all) creates the workspace normally without touching
+// workspace_secrets.
+func TestWorkspaceCreate_EmptySecrets_OK(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO workspaces").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	// No ExpectExec for workspace_secrets — empty map must be a no-op.
+	mock.ExpectCommit()
+	mock.ExpectExec("INSERT INTO canvas_layouts").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	body := `{"name":"No Secrets Agent","external":true,"secrets":{}}`
+	c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Create(c)
+
+	if w.Code != http.StatusCreated {
+		t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
 // ==================== GET /workspaces (List) ====================

 func TestWorkspaceList_Empty(t *testing.T) {
--- a/platform/internal/middleware/tenant_guard.go
+++ b/platform/internal/middleware/tenant_guard.go
@ -81,6 +81,13 @@ func TenantGuardWithOrgID(configuredOrgID string) gin.HandlerFunc {
 			c.Next()
 			return
 		}
+		// Tertiary: same-origin Canvas requests on tenant EC2 instances where
+		// Caddy serves Canvas (:3000) and API (:8080) under the same domain.
+		// CANVAS_PROXY_URL is set → Referer/Origin matches Host → trusted.
+		if isSameOriginCanvas(c) {
+			c.Next()
+			return
+		}
 		// 404 not 403 — existence of this tenant must not be inferable by
 		// probing other orgs' machines.
 		c.AbortWithStatus(404)
--- a/platform/internal/middleware/tenant_guard_test.go
+++ b/platform/internal/middleware/tenant_guard_test.go
@ -133,6 +133,64 @@ func TestOrgIDFromReplaySrc(t *testing.T) {
 	}
 }

+// Same-origin Canvas bypass: when CANVAS_PROXY_URL is set and Referer matches
+// Host, the request is from the co-served Canvas and should pass through.
+func TestTenantGuard_SameOriginCanvasBypass(t *testing.T) {
+	origActive := canvasProxyActive
+	canvasProxyActive = true
+	defer func() { canvasProxyActive = origActive }()
+
+	r := newGuardedRouter("org-abc")
+
+	req := httptest.NewRequest("GET", "/workspaces", nil)
+	req.Host = "molecule1.moleculesai.app"
+	req.Header.Set("Referer", "https://molecule1.moleculesai.app/")
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+
+	if w.Code != 200 {
+		t.Errorf("same-origin canvas: expected 200, got %d", w.Code)
+	}
+}
+
+// Same-origin Canvas bypass via Origin header (WebSocket upgrade path).
+func TestTenantGuard_SameOriginCanvasViaOrigin(t *testing.T) {
+	origActive := canvasProxyActive
+	canvasProxyActive = true
+	defer func() { canvasProxyActive = origActive }()
+
+	r := newGuardedRouter("org-abc")
+
+	req := httptest.NewRequest("GET", "/workspaces", nil)
+	req.Host = "molecule1.moleculesai.app"
+	req.Header.Set("Origin", "https://molecule1.moleculesai.app")
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+
+	if w.Code != 200 {
+		t.Errorf("same-origin canvas via Origin: expected 200, got %d", w.Code)
+	}
+}
+
+// Same-origin Canvas bypass must NOT work when CANVAS_PROXY_URL is unset.
+func TestTenantGuard_SameOriginCanvasInactiveWithoutEnv(t *testing.T) {
+	origActive := canvasProxyActive
+	canvasProxyActive = false
+	defer func() { canvasProxyActive = origActive }()
+
+	r := newGuardedRouter("org-abc")
+
+	req := httptest.NewRequest("GET", "/workspaces", nil)
+	req.Host = "molecule1.moleculesai.app"
+	req.Header.Set("Referer", "https://molecule1.moleculesai.app/")
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+
+	if w.Code != 404 {
+		t.Errorf("same-origin canvas without CANVAS_PROXY_URL: expected 404, got %d", w.Code)
+	}
+}
+
 // The allowlist is exact-match, not prefix. "/health/debug" must NOT bypass.
 func TestTenantGuard_AllowlistIsExactMatch(t *testing.T) {
 	gin.SetMode(gin.TestMode)
--- a/platform/internal/middleware/wsauth_middleware.go
+++ b/platform/internal/middleware/wsauth_middleware.go
@ -67,10 +67,17 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
 // Same lazy-bootstrap contract as WorkspaceAuth: if no live token exists
 // anywhere on the platform (fresh install / pre-Phase-30 upgrade), requests
 // are let through so existing deployments keep working. Once any workspace
-// has a live token every request to these routes MUST present a valid one.
+// has a live token every request to these routes MUST present a valid bearer
+// token — no Origin-based bypass. (#623)
 //
 // Any valid workspace bearer token is accepted — the route is not scoped to
 // a specific workspace so we only verify the token is live and unrevoked.
+//
+// NOTE: canvasOriginAllowed / isSameOriginCanvas are intentionally NOT called
+// here.  The Origin header is trivially forgeable by any container on the
+// Docker network; using it as an auth bypass would let an attacker reach
+// /settings/secrets, /bundles/import, /events, etc. without a bearer token.
+// Those short-circuits belong ONLY in CanvasOrBearer (cosmetic routes).
 func AdminAuth(database *sql.DB) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		ctx := c.Request.Context()
@ -82,7 +89,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
 			return
 		}
 		if hasLive {
-			// Bearer token path — agents, CLI, and API clients.
+			// Bearer token is the ONLY accepted credential for admin routes.
 			tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
 			if tok != "" {
 				if err := wsauth.ValidateAnyToken(ctx, database, tok); err != nil {
@ -92,16 +99,6 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
 				c.Next()
 				return
 			}
-			// Canvas origin path — cross-origin canvas (CORS_ORIGINS match).
-			if canvasOriginAllowed(c.GetHeader("Origin")) {
-				c.Next()
-				return
-			}
-			// Same-origin canvas path — tenant image where canvas + API share a host.
-			if isSameOriginCanvas(c) {
-				c.Next()
-				return
-			}
 			c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "admin auth required"})
 			return
 		}
@ -220,19 +217,25 @@ func isSameOriginCanvas(c *gin.Context) bool {
 	if !canvasProxyActive {
 		return false
 	}
-	referer := c.GetHeader("Referer")
-	if referer == "" {
-		return false
-	}
 	host := c.Request.Host
 	if host == "" {
 		return false
 	}
-	// Referer must start with https://<host>/ or http://<host>/ (trailing
-	// slash required to prevent hongming-wang.moleculesai.app.evil.com from
-	// matching hongming-wang.moleculesai.app).
-	return strings.HasPrefix(referer, "https://"+host+"/") ||
-		strings.HasPrefix(referer, "http://"+host+"/") ||
-		referer == "https://"+host ||
-		referer == "http://"+host
+	// Check Referer first (standard browser requests).
+	referer := c.GetHeader("Referer")
+	if referer != "" {
+		// Referer must start with https://<host>/ or http://<host>/ (trailing
+		// slash required to prevent hongming-wang.moleculesai.app.evil.com from
+		// matching hongming-wang.moleculesai.app).
+		if strings.HasPrefix(referer, "https://"+host+"/") ||
+			strings.HasPrefix(referer, "http://"+host+"/") ||
+			referer == "https://"+host ||
+			referer == "http://"+host {
+			return true
+		}
+	}
+	// Fallback: check Origin header (WebSocket upgrade requests may not have
+	// Referer but always send Origin).
+	origin := c.GetHeader("Origin")
+	return origin == "https://"+host || origin == "http://"+host
 }
--- a/platform/internal/middleware/wsauth_middleware_test.go
+++ b/platform/internal/middleware/wsauth_middleware_test.go
@ -778,3 +778,116 @@ func TestCanvasOriginAllowed_LocalhostDefault(t *testing.T) {
 		t.Error("random origin should not be allowed")
 	}
 }
+
+// ── Issue #623 regression ─────────────────────────────────────────────────────
+// AdminAuth must NOT accept forged Origin headers. Any container on the Docker
+// network can set Origin: http://localhost:3000 without a bearer token, which
+// previously bypassed AdminAuth on ALL admin-gated routes. (#623, dup #626)
+
+// TestAdminAuth_623_ForgedOrigin_Returns401 — the main regression test:
+// a request with a matching CORS origin but no bearer token must be rejected.
+func TestAdminAuth_623_ForgedOrigin_Returns401(t *testing.T) {
+	mockDB, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock: %v", err)
+	}
+	defer mockDB.Close()
+
+	// Platform has live tokens — AdminAuth is active.
+	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
+
+	t.Setenv("CORS_ORIGINS", "http://localhost:3000")
+
+	r := gin.New()
+	r.GET("/settings/secrets", AdminAuth(mockDB), func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"secrets": []string{"OPENAI_API_KEY"}})
+	})
+
+	w := httptest.NewRecorder()
+	// #623 attack: forge the canvas Origin header — no bearer token.
+	req, _ := http.NewRequest(http.MethodGet, "/settings/secrets", nil)
+	req.Header.Set("Origin", "http://localhost:3000")
+	r.ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("#623 forged Origin bypass: expected 401, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestAdminAuth_623_ForgedCORSOrigin_Returns401 — variant: attacker uses the
+// tenant-domain CORS origin from CORS_ORIGINS (not just localhost).
+func TestAdminAuth_623_ForgedCORSOrigin_Returns401(t *testing.T) {
+	mockDB, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock: %v", err)
+	}
+	defer mockDB.Close()
+
+	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
+
+	t.Setenv("CORS_ORIGINS", "https://acme.moleculesai.app")
+
+	r := gin.New()
+	r.GET("/admin/secrets", AdminAuth(mockDB), func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"ok": true})
+	})
+
+	w := httptest.NewRecorder()
+	req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
+	req.Header.Set("Origin", "https://acme.moleculesai.app")
+	r.ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("#623 forged tenant Origin: expected 401, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestAdminAuth_623_ValidBearer_WithOrigin_Passes — bearer + matching Origin
+// should still work (the Origin is irrelevant once the bearer validates).
+func TestAdminAuth_623_ValidBearer_WithOrigin_Passes(t *testing.T) {
+	mockDB, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock: %v", err)
+	}
+	defer mockDB.Close()
+
+	goodToken := "valid-bearer-token-xyz"
+	tokenHash := sha256.Sum256([]byte(goodToken))
+
+	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
+	mock.ExpectQuery(validateAnyTokenSelectQuery).
+		WithArgs(tokenHash[:]).
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("tok-1"))
+	mock.ExpectExec(validateTokenUpdateQuery).
+		WithArgs("tok-1").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	t.Setenv("CORS_ORIGINS", "http://localhost:3000")
+
+	r := gin.New()
+	r.GET("/settings/secrets", AdminAuth(mockDB), func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"ok": true})
+	})
+
+	w := httptest.NewRecorder()
+	req, _ := http.NewRequest(http.MethodGet, "/settings/secrets", nil)
+	req.Header.Set("Authorization", "Bearer "+goodToken)
+	req.Header.Set("Origin", "http://localhost:3000") // present but irrelevant
+	r.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("bearer+origin: expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
--- a/platform/internal/models/workspace.go
+++ b/platform/internal/models/workspace.go
@ -63,6 +63,10 @@ type CreateWorkspacePayload struct {
 	WorkspaceDir    string  `json:"workspace_dir"`    // host path to mount as /workspace (empty = isolated volume)
 	WorkspaceAccess string  `json:"workspace_access"` // "none" (default), "read_only", or "read_write" — see #65
 	ParentID        *string `json:"parent_id"`
+	// Secrets is an optional map of key→plaintext-value pairs to persist as
+	// workspace secrets at creation time.  Stored encrypted (same path as
+	// POST /workspaces/:id/secrets).  Nil/empty map is a no-op.
+	Secrets map[string]string `json:"secrets"`
 	Canvas   struct {
 		X float64 `json:"x"`
 		Y float64 `json:"y"`
--- a/platform/internal/router/admin_test_token_route_test.go
+++ b/platform/internal/router/admin_test_token_route_test.go
@ -0,0 +1,101 @@
+package router
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
+	"github.com/gin-gonic/gin"
+)
+
+// buildTestTokenEngine builds a minimal Gin engine containing only the
+// test-token route with AdminAuth middleware — the same registration that
+// router.go now uses. Allows us to verify the auth gate is enforced at the
+// HTTP layer without spinning up the full Setup() dependency graph.
+func buildTestTokenEngine(t *testing.T) gin.IRouter {
+	t.Helper()
+	gin.SetMode(gin.TestMode)
+	r := gin.New()
+	tokh := handlers.NewAdminTestTokenHandler()
+	r.GET("/admin/workspaces/:id/test-token", middleware.AdminAuth(db.DB), tokh.GetTestToken)
+	return r
+}
+
+// setupRouterTestDB initialises db.DB with a sqlmock connection and returns
+// the mock controller. Restores db.DB on test cleanup.
+func setupRouterTestDB(t *testing.T) sqlmock.Sqlmock {
+	t.Helper()
+	mockDB, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	prev := db.DB
+	db.DB = mockDB
+	t.Cleanup(func() {
+		db.DB = prev
+		mockDB.Close()
+	})
+	return mock
+}
+
+// TestTestTokenRoute_RequiresAdminAuth_WhenTokensExist verifies that once the
+// platform has at least one live token, the test-token endpoint returns 401
+// for callers that provide no Authorization header. This is the core security
+// property added by the fix — without AdminAuth in the router the request
+// would reach the handler and mint a new bearer for any workspace UUID.
+func TestTestTokenRoute_RequiresAdminAuth_WhenTokensExist(t *testing.T) {
+	t.Setenv("MOLECULE_ENV", "development") // enable the handler itself
+	mock := setupRouterTestDB(t)
+
+	// HasAnyLiveTokenGlobal: platform has one enrolled workspace.
+	mock.ExpectQuery("SELECT COUNT.*FROM workspace_auth_tokens").
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
+
+	r := buildTestTokenEngine(t)
+	w := httptest.NewRecorder()
+	req := httptest.NewRequest("GET", "/admin/workspaces/ws-target/test-token", nil)
+	// No Authorization header — should be rejected by AdminAuth.
+	r.(http.Handler).ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401 when tokens exist and no auth header, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
+
+// TestTestTokenRoute_FailOpenOnFreshInstall verifies that AdminAuth is
+// fail-open on a fresh install (HasAnyLiveTokenGlobal == 0), so the test-token
+// bootstrap path still works before the first workspace has registered.
+func TestTestTokenRoute_FailOpenOnFreshInstall(t *testing.T) {
+	t.Setenv("MOLECULE_ENV", "development")
+	mock := setupRouterTestDB(t)
+
+	// HasAnyLiveTokenGlobal: no tokens yet — fresh install.
+	mock.ExpectQuery("SELECT COUNT.*FROM workspace_auth_tokens").
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+	// Handler's own DB queries: workspace existence check + token insert.
+	mock.ExpectQuery("SELECT id FROM workspaces WHERE id =").
+		WithArgs("ws-bootstrap").
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-bootstrap"))
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	r := buildTestTokenEngine(t)
+	w := httptest.NewRecorder()
+	req := httptest.NewRequest("GET", "/admin/workspaces/ws-bootstrap/test-token", nil)
+	r.(http.Handler).ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200 on fresh install (fail-open), got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
--- a/platform/internal/router/router.go
+++ b/platform/internal/router/router.go
@ -279,6 +279,11 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
 		wsAuth.PUT("/secrets", sech.Set)
 		wsAuth.DELETE("/secrets/:key", sech.Delete)
 		wsAuth.GET("/model", sech.GetModel)
+
+		// Token usage metrics — cost transparency (#593).
+		// WorkspaceAuth middleware (on wsAuth) binds the bearer to :id.
+		mtrh := handlers.NewMetricsHandler()
+		wsAuth.GET("/metrics", mtrh.GetMetrics)
 	}

 	// Global secrets — /settings/secrets is the canonical path; /admin/secrets kept for backward compat.
@ -297,11 +302,24 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
 	}

 	// Admin — test token minting (issue #6). Hidden in production via TestTokensEnabled().
-	// Registered at root (not inside AdminAuth) because it is itself the bootstrap for
-	// acquiring a token, and it's gated on MOLECULE_ENV / MOLECULE_ENABLE_TEST_TOKENS.
+	// AdminAuth is a second defence-in-depth layer: on a fresh install with no tokens yet,
+	// AdminAuth is fail-open (HasAnyLiveTokenGlobal == 0), so the bootstrap still works.
+	// Once any token exists, callers must present a valid bearer — unauthenticated workspace-
+	// UUID enumeration is blocked even on non-production instances.
 	{
 		tokh := handlers.NewAdminTestTokenHandler()
-		r.GET("/admin/workspaces/:id/test-token", tokh.GetTestToken)
+		r.GET("/admin/workspaces/:id/test-token", middleware.AdminAuth(db.DB), tokh.GetTestToken)
+	}
+
+	// Admin — GitHub App installation token refresh (issue #547).
+	// Long-running workspaces (>60 min) use this endpoint to refresh
+	// GH_TOKEN without restarting. Returns the current installation token
+	// from the github-app-auth plugin's in-process cache (which proactively
+	// refreshes 5 min before expiry). 404 when no GitHub App is configured
+	// (dev / self-hosted without GITHUB_APP_ID).
+	{
+		ghTokH := handlers.NewGitHubTokenHandler(wh.TokenRegistry())
+		r.GET("/admin/github-installation-token", middleware.AdminAuth(db.DB), ghTokH.GetInstallationToken)
 	}

 	// Terminal — shares Docker client with provisioner
@ -390,6 +408,16 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
 	// depth keeps the route behind AdminAuth regardless.
 	r.POST("/org/import", middleware.AdminAuth(db.DB), orgh.Import)

+	// Org plugin allowlist — tool governance (#591).
+	// Both endpoints are admin-gated: reading the allowlist reveals approved
+	// tooling policy; writing it enforces org-level install governance.
+	{
+		allowlistAdmin := r.Group("", middleware.AdminAuth(db.DB))
+		aplh := handlers.NewOrgPluginAllowlistHandler()
+		allowlistAdmin.GET("/orgs/:id/plugins/allowlist", aplh.GetAllowlist)
+		allowlistAdmin.PUT("/orgs/:id/plugins/allowlist", aplh.PutAllowlist)
+	}
+
 	// Channels (social integrations — Telegram, Slack, Discord, etc.)
 	chh := handlers.NewChannelHandler(channelMgr)
 	r.GET("/channels/adapters", chh.ListAdapters)
@ -408,6 +436,11 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
 	r.POST("/channels/discover", middleware.AdminAuth(db.DB), chh.Discover)
 	r.POST("/webhooks/:type", chh.Webhook)

+	// SSE — AG-UI compatible event stream per workspace (#590).
+	// WorkspaceAuth middleware (on wsAuth) binds the bearer token to :id.
+	sseh := handlers.NewSSEHandler(broadcaster)
+	wsAuth.GET("/events/stream", sseh.StreamEvents)
+
 	// WebSocket
 	sh := handlers.NewSocketHandler(hub)
 	r.GET("/ws", sh.HandleConnect)
--- a/platform/migrations/026_workspace_token_usage.down.sql
+++ b/platform/migrations/026_workspace_token_usage.down.sql
@ -0,0 +1 @@
+DROP TABLE IF EXISTS workspace_token_usage;
--- a/platform/migrations/026_workspace_token_usage.up.sql
+++ b/platform/migrations/026_workspace_token_usage.up.sql
@ -0,0 +1,17 @@
+-- Per-workspace LLM token usage tracking (#593 — canvas cost transparency).
+-- Stores UTC-day aggregates upserted by the A2A proxy after each LLM call.
+-- estimated_cost_usd is computed server-side using fixed per-model rates
+-- (default: Claude Sonnet input $3/1M, output $15/1M).
+CREATE TABLE IF NOT EXISTS workspace_token_usage (
+  id                 UUID         PRIMARY KEY DEFAULT gen_random_uuid(),
+  workspace_id       TEXT         NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
+  period_start       TIMESTAMPTZ  NOT NULL,
+  input_tokens       BIGINT       NOT NULL DEFAULT 0,
+  output_tokens      BIGINT       NOT NULL DEFAULT 0,
+  call_count         INTEGER      NOT NULL DEFAULT 0,
+  estimated_cost_usd NUMERIC(12,6) NOT NULL DEFAULT 0,
+  updated_at         TIMESTAMPTZ  NOT NULL DEFAULT NOW()
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS workspace_token_usage_ws_period
+  ON workspace_token_usage(workspace_id, period_start);
--- a/platform/migrations/027_org_plugin_allowlist.down.sql
+++ b/platform/migrations/027_org_plugin_allowlist.down.sql
@ -0,0 +1 @@
+DROP TABLE IF EXISTS org_plugin_allowlist;
--- a/platform/migrations/027_org_plugin_allowlist.up.sql
+++ b/platform/migrations/027_org_plugin_allowlist.up.sql
@ -0,0 +1,17 @@
+-- Per-org plugin allowlist for tool governance (#591).
+-- When an org has at least one entry in this table, workspace agents may only
+-- install plugins listed here. An empty allowlist means "allow all" (backward
+-- compatible with existing deployments).
+--
+-- org_id references the root/parent workspace that acts as the org anchor.
+-- enabled_by records the workspace ID of the admin who added the entry.
+CREATE TABLE IF NOT EXISTS org_plugin_allowlist (
+  id          UUID        PRIMARY KEY DEFAULT gen_random_uuid(),
+  org_id      TEXT        NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
+  plugin_name TEXT        NOT NULL,
+  enabled_by  TEXT        NOT NULL,
+  enabled_at  TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS org_plugin_allowlist_org_plugin
+  ON org_plugin_allowlist(org_id, plugin_name);
--- a/platform/pkg/provisionhook/mutator.go
+++ b/platform/pkg/provisionhook/mutator.go
@ -48,6 +48,7 @@ import (
 	"context"
 	"fmt"
 	"sync"
+	"time"
 )

 // EnvMutator is implemented by plugins that want to inject env vars
@ -64,6 +65,34 @@ type EnvMutator interface {
 	MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error
 }

+// TokenProvider is an optional interface that EnvMutator implementations
+// may also satisfy. When a mutator implements TokenProvider the platform
+// can serve GET /admin/github-installation-token, allowing long-running
+// workspaces to fetch a fresh GitHub token without restarting.
+//
+// # Why a separate interface?
+//
+// EnvMutator.MutateEnv is called once at provision time and writes into
+// an env map. Calling it again just to read the current token would be
+// semantically wrong and potentially unsafe (the env map is a live
+// workspace struct). TokenProvider cleanly separates "what do I inject
+// at boot?" from "what is the live token right now?".
+//
+// # Plugin contract
+//
+// Token must return the current valid token and the time at which it
+// will expire. If the plugin's internal cache is past its refresh
+// threshold it must block until a new token is obtained before
+// returning. Token should never return an expired token — callers rely
+// on this guarantee and do not do their own expiry check.
+//
+// Returning a non-nil error causes the HTTP handler to respond 500 and
+// log "[github] token refresh failed: <err>". The workspace will retry
+// on its next credential-helper invocation.
+type TokenProvider interface {
+	Token(ctx context.Context) (token string, expiresAt time.Time, err error)
+}
+
 // Registry holds the ordered list of EnvMutator instances the
 // provisioner runs before each workspace boot. Safe for concurrent
 // registration + execution.
@ -112,6 +141,26 @@ func (r *Registry) Names() []string {
 	return names
 }

+// FirstTokenProvider returns the first registered mutator that also
+// implements TokenProvider, or nil if none do. Used to back the
+// GET /admin/github-installation-token endpoint so long-running
+// workspaces can refresh their GITHUB_TOKEN without a container restart.
+//
+// A nil registry returns nil (no provider configured).
+func (r *Registry) FirstTokenProvider() TokenProvider {
+	if r == nil {
+		return nil
+	}
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	for _, m := range r.mutators {
+		if tp, ok := m.(TokenProvider); ok {
+			return tp
+		}
+	}
+	return nil
+}
+
 // Run calls every registered mutator in order. The first one to return
 // a non-nil error aborts the chain — subsequent mutators do NOT run,
 // and the error is returned to the caller (which marks the workspace
--- a/scripts/dedup_settings_hooks.py
+++ b/scripts/dedup_settings_hooks.py
@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""Deduplicate hook entries in .claude/settings.json across all workspace containers.
+
+Root cause: molecule_runtime's _deep_merge_hooks() uses unconditional list.extend()
+when merging plugin settings-fragment.json files. On every plugin install/reinstall
+each hook handler is appended again, producing 3-4x duplicates that cause every
+hook to fire 3-4x per event.
+
+This script fixes the live settings.json in every running workspace container via
+the shared /proc/<PID>/root filesystem (no docker CLI required), then validates the
+output is clean JSON. Safe to re-run — idempotent (already-clean files are skipped).
+
+Upstream fix needed: molecule_runtime.plugins_registry.builtins._deep_merge_hooks()
+should deduplicate by (matcher, frozenset(commands)) before writing. Tracked in
+molecule-core issue (filed separately).
+
+Usage:
+    python3 scripts/dedup_settings_hooks.py [--dry-run]
+"""
+
+from __future__ import annotations
+
+import glob
+import json
+import sys
+
+DRY_RUN = "--dry-run" in sys.argv
+
+
+def dedup_settings(data: dict) -> tuple[dict, dict[str, tuple[int, int]]]:
+    """Return (deduped_data, stats) where stats[event] = (before_count, after_count)."""
+    if "hooks" not in data:
+        return data, {}
+    new_hooks: dict = {}
+    stats: dict[str, tuple[int, int]] = {}
+    for event, handlers in data["hooks"].items():
+        seen: set = set()
+        deduped: list = []
+        for handler in handlers:
+            matcher = handler.get("matcher", "")
+            commands = frozenset(h.get("command", "") for h in handler.get("hooks", []))
+            key = (matcher, commands)
+            if key not in seen:
+                seen.add(key)
+                deduped.append(handler)
+        stats[event] = (len(handlers), len(deduped))
+        new_hooks[event] = deduped
+    return {**data, "hooks": new_hooks}, stats
+
+
+def main() -> None:
+    pattern = "/proc/*/root/configs/.claude/settings.json"
+    paths = sorted(glob.glob(pattern))
+
+    fixed: list[tuple[str, dict]] = []
+    already_clean: list[str] = []
+    errors: list[tuple[str, str]] = []
+
+    for path in paths:
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            deduped, stats = dedup_settings(data)
+            changed = any(before != after for before, after in stats.values())
+            if changed:
+                if not DRY_RUN:
+                    with open(path, "w") as f:
+                        json.dump(deduped, f, indent=2)
+                        f.write("\n")
+                fixed.append((path, stats))
+            else:
+                already_clean.append(path)
+        except PermissionError as e:
+            errors.append((path, f"PermissionError: {e}"))
+        except json.JSONDecodeError as e:
+            errors.append((path, f"JSONDecodeError: {e}"))
+        except Exception as e:
+            errors.append((path, str(e)))
+
+    mode = "[DRY RUN] " if DRY_RUN else ""
+    print(f"{mode}Fixed: {len(fixed)}")
+    for path, stats in fixed:
+        pid = path.split("/")[2]
+        summary = ", ".join(f"{ev}: {b}→{a}" for ev, (b, a) in stats.items() if b != a)
+        print(f"  PID {pid}: {summary}")
+    print(f"{mode}Already clean: {len(already_clean)}")
+    if errors:
+        print(f"Errors: {len(errors)}")
+        for path, err in errors:
+            print(f"  {path}: {err}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/verify_settings_hooks.py
+++ b/scripts/verify_settings_hooks.py
@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""Verify settings.json hook deduplication across all workspace containers.
+
+Exits 0 if all containers have clean (no-duplicate) hook lists.
+Exits 1 if any container still has duplicate hook entries.
+
+Usage:
+    python3 scripts/verify_settings_hooks.py
+"""
+
+from __future__ import annotations
+
+import glob
+import json
+import sys
+
+
+def has_duplicates(data: dict) -> tuple[bool, dict[str, tuple[int, int]]]:
+    stats: dict[str, tuple[int, int]] = {}
+    duplicate_found = False
+    for event, handlers in data.get("hooks", {}).items():
+        seen: set = set()
+        for handler in handlers:
+            matcher = handler.get("matcher", "")
+            commands = frozenset(h.get("command", "") for h in handler.get("hooks", []))
+            key = (matcher, commands)
+            if key in seen:
+                duplicate_found = True
+            seen.add(key)
+        stats[event] = (len(handlers), len(seen))
+    return duplicate_found, stats
+
+
+def main() -> None:
+    pattern = "/proc/*/root/configs/.claude/settings.json"
+    paths = sorted(glob.glob(pattern))
+
+    dirty: list[tuple[str, dict]] = []
+    clean = 0
+    errors: list[tuple[str, str]] = []
+
+    for path in paths:
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            dup, stats = has_duplicates(data)
+            if dup:
+                dirty.append((path, stats))
+            else:
+                clean += 1
+        except Exception as e:
+            errors.append((path, str(e)))
+
+    print(f"Clean: {clean}  Dirty: {len(dirty)}  Errors: {len(errors)}")
+    for path, stats in dirty:
+        pid = path.split("/")[2]
+        summary = ", ".join(f"{ev}: {total} total/{unique} unique" for ev, (total, unique) in stats.items())
+        print(f"  DIRTY PID {pid}: {summary}")
+    for path, err in errors:
+        print(f"  ERROR {path}: {err}", file=sys.stderr)
+
+    if dirty or errors:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/workspace-template/adapters/google-adk/README.md
+++ b/workspace-template/adapters/google-adk/README.md
@ -0,0 +1,130 @@
+# Google ADK Adapter
+
+Molecule AI workspace adapter for [Google Agent Development Kit (ADK)](https://github.com/google/adk-python) — Google's official multi-agent Python SDK (~19k ⭐, Apache-2.0).
+
+## Overview
+
+This adapter bridges the A2A protocol used by the Molecule AI platform to Google ADK's runner/session model. Agents are backed by Google Gemini models via AI Studio or Vertex AI. Each workspace gets an `LlmAgent` wrapped in a `Runner` with an `InMemorySessionService`; sessions are tied to A2A task context IDs for stable, isolated per-conversation state.
+
+**Runtime key:** `google-adk`
+
+## Installation
+
+The adapter dependencies are installed automatically by `entrypoint.sh` from this directory's `requirements.txt`:
+
+```bash
+pip install -r adapters/google-adk/requirements.txt
+```
+
+You'll also need a Google API key (AI Studio) or Vertex AI credentials.
+
+## Configuration
+
+### `config.yaml`
+
+```yaml
+runtime: google-adk
+model: google:gemini-2.0-flash        # or gemini-1.5-pro, gemini-2.5-flash, etc.
+runtime_config:
+  agent_name: my-agent                # optional, default: molecule-adk-agent
+  max_output_tokens: 8192             # optional, default: 8192
+  temperature: 1.0                    # optional, default: 1.0
+```
+
+### Environment Variables
+
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `GOOGLE_API_KEY` | Yes (unless Vertex AI) | Google AI Studio API key |
+| `GOOGLE_GENAI_USE_VERTEXAI` | No | Set to `"1"` to use Vertex AI instead of AI Studio |
+| `GOOGLE_CLOUD_PROJECT` | When using Vertex AI | GCP project ID |
+| `GOOGLE_CLOUD_LOCATION` | When using Vertex AI | GCP region, e.g. `"us-central1"` |
+
+## Usage Example
+
+```python
+import asyncio
+from adapter_base import AdapterConfig
+from adapters.google_adk.adapter import GoogleADKAdapter
+
+async def main():
+    config = AdapterConfig(
+        model="google:gemini-2.0-flash",
+        system_prompt="You are a helpful assistant.",
+        runtime_config={
+            "agent_name": "demo-agent",
+            "max_output_tokens": 1024,
+            "temperature": 0.7,
+        },
+        workspace_id="ws-demo",
+    )
+
+    adapter = GoogleADKAdapter()
+    await adapter.setup(config)              # validates keys, loads plugins/skills
+
+    executor = await adapter.create_executor(config)  # returns GoogleADKA2AExecutor
+    # executor.execute(context, event_queue) is called by the A2A server per turn
+    print(f"Adapter: {adapter.display_name()} — model {config.model}")
+
+asyncio.run(main())
+```
+
+### Running via A2A
+
+Once the workspace is provisioned, send A2A messages as normal:
+
+```bash
+curl -X POST http://localhost:8000 \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"kind": "text", "text": "What is 2 + 2?"}]
+      }
+    }
+  }'
+```
+
+## Supported Models
+
+Any model supported by Google ADK and available through your credential path:
+
+| Model | Notes |
+|-------|-------|
+| `gemini-2.0-flash` | Recommended — fast, cost-effective |
+| `gemini-2.5-flash` | Latest preview, strong reasoning |
+| `gemini-1.5-pro` | Higher capability, higher latency |
+| `gemini-1.5-flash` | Fast, lower cost |
+
+Use the `google:` prefix in `config.yaml` — the adapter strips it before passing the model name to ADK.
+
+## Architecture
+
+```
+A2A Request
+    │
+    ▼
+GoogleADKA2AExecutor.execute()
+    │
+    ├── extract_message_text()   ← shared_runtime helper
+    ├── _ensure_session()        ← create/reuse InMemorySessionService session
+    ├── _build_content()         ← wrap text in google.genai.types.Content
+    │
+    ▼
+runner.run_async(session_id, user_id, new_message)
+    │
+    ▼
+ADK Event stream → filter is_final_response() → extract text
+    │
+    ▼
+event_queue.enqueue_event(new_agent_text_message(reply))
+    │
+    ▼
+A2A Response
+```
+
+## License
+
+Apache-2.0 — same as [google/adk-python](https://github.com/google/adk-python).
--- a/workspace-template/adapters/google-adk/adapter.py
+++ b/workspace-template/adapters/google-adk/adapter.py
@ -0,0 +1,392 @@
+"""Google ADK adapter for Molecule AI workspace runtime.
+
+Wraps Google's Agent Development Kit (google-adk v1.x) as a Molecule AI
+WorkspaceAdapter, bridging the A2A protocol to Google ADK's runner/session
+model.
+
+Google ADK concepts used
+------------------------
+- ``google.adk.agents.LlmAgent``  — An LLM-backed agent with instructions and
+  optional tools.  Declared with ``model``, ``name``, and ``instruction``.
+- ``google.adk.runners.Runner``   — Drives one or more agents inside a session;
+  ``run_async()`` streams ``Event`` objects, including the final response text.
+- ``google.adk.sessions.InMemorySessionService`` — Manages session state in
+  memory.  Each ``Runner`` owns a single ``InMemorySessionService`` instance.
+
+Runtime-config keys (all optional)
+------------------------------------
+``max_output_tokens`` — int, default 8192.  Forwarded to the ADK ``GenerateContentConfig``.
+``temperature``       — float, default 1.0.
+``agent_name``        — str, default ``"molecule-adk-agent"``.
+
+Environment variables
+---------------------
+``GOOGLE_API_KEY``   — Google AI Studio key (required for ``gemini-*`` models).
+``GOOGLE_GENAI_USE_VERTEXAI`` — set to ``"1"`` to use Vertex AI instead of AI
+                                Studio.  In that case supply
+                                ``GOOGLE_CLOUD_PROJECT`` and
+                                ``GOOGLE_CLOUD_LOCATION`` as well.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import TYPE_CHECKING, Any
+
+from a2a.server.agent_execution import AgentExecutor, RequestContext
+from a2a.server.events import EventQueue
+from a2a.utils import new_agent_text_message
+
+from adapter_base import AdapterConfig, BaseAdapter
+
+if TYPE_CHECKING:
+    pass
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+_DEFAULT_AGENT_NAME = "molecule-adk-agent"
+_DEFAULT_MAX_OUTPUT_TOKENS = 8192
+_DEFAULT_TEMPERATURE = 1.0
+_NO_TEXT_MSG = "Error: message contained no text content."
+_NO_RESPONSE_MSG = "(no response generated)"
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor
+# ---------------------------------------------------------------------------
+
+
+class GoogleADKA2AExecutor(AgentExecutor):
+    """A2A executor backed by a Google ADK ``Runner``.
+
+    Each executor instance owns a single ``Runner`` and ``InMemorySessionService``.
+    Sessions are created on first use and reused across subsequent turns
+    (the session_id is derived from the A2A context_id so each task gets a
+    stable, isolated session).
+
+    Parameters
+    ----------
+    model:
+        ADK model identifier, e.g. ``"gemini-2.0-flash"`` or
+        ``"gemini-1.5-pro"``.
+    system_prompt:
+        Optional instruction prepended to every conversation.  Passed to
+        ``LlmAgent(instruction=...)``.
+    agent_name:
+        Internal ADK agent name.  Defaults to ``_DEFAULT_AGENT_NAME``.
+    max_output_tokens:
+        Token cap forwarded to ``GenerateContentConfig``.
+    temperature:
+        Sampling temperature forwarded to ``GenerateContentConfig``.
+    heartbeat:
+        Optional ``HeartbeatLoop`` instance (unused directly but stored for
+        future heartbeat integration).
+    _runner:
+        Inject a pre-built ``Runner`` — for testing only.  When provided,
+        the real ADK ``Runner`` is never constructed.
+    """
+
+    def __init__(
+        self,
+        model: str,
+        system_prompt: str | None = None,
+        agent_name: str = _DEFAULT_AGENT_NAME,
+        max_output_tokens: int = _DEFAULT_MAX_OUTPUT_TOKENS,
+        temperature: float = _DEFAULT_TEMPERATURE,
+        heartbeat: Any = None,
+        _runner: Any = None,
+    ) -> None:
+        self.model = model
+        self.system_prompt = system_prompt
+        self.agent_name = agent_name
+        self.max_output_tokens = max_output_tokens
+        self.temperature = temperature
+        self._heartbeat = heartbeat
+        self._sessions_created: set[str] = set()
+
+        if _runner is not None:
+            # Test injection — skip building the real ADK objects.
+            self._runner = _runner
+        else:
+            self._runner = self._build_runner()
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _build_runner(self) -> Any:  # pragma: no cover — requires real ADK
+        """Construct a Google ADK ``Runner`` with an ``LlmAgent``.
+
+        Lazy-imports ``google.adk`` so the rest of the workspace runtime
+        doesn't pull in google-adk on startup (it's only needed when this
+        executor is actually instantiated by ``GoogleADKAdapter.create_executor``).
+        """
+        from google.adk.agents import LlmAgent
+        from google.adk.runners import Runner
+        from google.adk.sessions import InMemorySessionService
+
+        agent = LlmAgent(
+            name=self.agent_name,
+            model=self.model,
+            instruction=self.system_prompt or "",
+        )
+
+        session_service = InMemorySessionService()
+        runner = Runner(
+            agent=agent,
+            app_name=self.agent_name,
+            session_service=session_service,
+        )
+        return runner
+
+    async def _ensure_session(self, session_id: str, user_id: str) -> None:
+        """Create a session in the service if it doesn't exist yet."""
+        if session_id in self._sessions_created:
+            return
+        session_service = self._runner.session_service
+        existing = await session_service.get_session(
+            app_name=self.agent_name,
+            user_id=user_id,
+            session_id=session_id,
+        )
+        if existing is None:
+            await session_service.create_session(
+                app_name=self.agent_name,
+                user_id=user_id,
+                session_id=session_id,
+            )
+        self._sessions_created.add(session_id)
+
+    def _extract_text(self, context: RequestContext) -> str:
+        """Pull plain text out of the A2A message parts."""
+        from shared_runtime import extract_message_text
+        return extract_message_text(context)
+
+    def _build_content(self, user_text: str) -> Any:
+        """Wrap user text in an ADK-compatible ``Content`` object."""
+        from google.genai.types import Content, Part
+        return Content(role="user", parts=[Part(text=user_text)])
+
+    # ------------------------------------------------------------------
+    # AgentExecutor interface
+    # ------------------------------------------------------------------
+
+    async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
+        """Run a single ADK turn and enqueue the reply as an A2A Message.
+
+        Sequence:
+        1. Extract user text from A2A message parts.
+        2. Ensure an ADK session exists for this context_id.
+        3. Call ``runner.run_async()`` and collect all response events.
+        4. Concatenate final-response text; fall back to ``_NO_RESPONSE_MSG``
+           when the model produces no output.
+        5. Enqueue the reply via ``event_queue``.
+        """
+        user_text = self._extract_text(context)
+        if not user_text:
+            parts = getattr(getattr(context, "message", None), "parts", None)
+            logger.warning("GoogleADKA2AExecutor: no text in message parts: %s", parts)
+            await event_queue.enqueue_event(new_agent_text_message(_NO_TEXT_MSG))
+            return
+
+        session_id = getattr(context, "context_id", None) or "default-session"
+        user_id = "molecule-user"
+
+        try:
+            await self._ensure_session(session_id, user_id)
+
+            content = self._build_content(user_text)
+            response_parts: list[str] = []
+
+            async for event in self._runner.run_async(
+                session_id=session_id,
+                user_id=user_id,
+                new_message=content,
+            ):
+                # Collect text from final-response events
+                if not getattr(event, "is_final_response", lambda: False)():
+                    continue
+                candidate_response = getattr(event, "response", None)
+                if candidate_response is None:
+                    continue
+                for part in getattr(
+                    getattr(candidate_response, "content", None) or MissingContent(),
+                    "parts", []
+                ):
+                    text = getattr(part, "text", None)
+                    if text:
+                        response_parts.append(text)
+
+            final_text = "".join(response_parts).strip() or _NO_RESPONSE_MSG
+            await event_queue.enqueue_event(new_agent_text_message(final_text))
+
+        except Exception as exc:
+            logger.error(
+                "GoogleADKA2AExecutor: execution error [model=%s]: %s",
+                self.model,
+                type(exc).__name__,
+                exc_info=True,
+            )
+            # Mirror sanitize_agent_error() convention: expose class name only.
+            await event_queue.enqueue_event(
+                new_agent_text_message(f"Agent error: {type(exc).__name__}")
+            )
+
+    async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
+        """Cancel a running task — emits canceled state per A2A protocol."""
+        from a2a.types import TaskState, TaskStatus, TaskStatusUpdateEvent
+
+        await event_queue.enqueue_event(
+            TaskStatusUpdateEvent(
+                status=TaskStatus(state=TaskState.canceled),
+                final=True,
+            )
+        )
+
+
+class MissingContent:
+    """Sentinel to avoid AttributeError when response.content is None."""
+    parts: list = []
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKAdapter
+# ---------------------------------------------------------------------------
+
+
+class GoogleADKAdapter(BaseAdapter):
+    """Molecule AI workspace adapter for Google ADK (google-adk v1.x).
+
+    Implements the full ``BaseAdapter`` lifecycle:
+    - ``setup()``           — validates config and runs ``_common_setup()``.
+    - ``create_executor()`` — returns a ``GoogleADKA2AExecutor`` configured
+                             from ``AdapterConfig``.
+    """
+
+    # Stored by setup(); consumed by create_executor()
+    _setup_result: Any = None
+
+    # ------------------------------------------------------------------
+    # Identity
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def name() -> str:
+        """Runtime identifier — matches the ``runtime`` field in config.yaml."""
+        return "google-adk"
+
+    @staticmethod
+    def display_name() -> str:
+        """Human-readable name shown in the Molecule AI UI."""
+        return "Google ADK"
+
+    @staticmethod
+    def description() -> str:
+        """Short description of this adapter's capabilities."""
+        return (
+            "Google Agent Development Kit (ADK) adapter. "
+            "Runs LLM agents via Google Gemini models using the official "
+            "google-adk Python SDK (Apache-2.0)."
+        )
+
+    @staticmethod
+    def get_config_schema() -> dict:
+        """JSON Schema for runtime_config fields rendered in the Config tab."""
+        return {
+            "type": "object",
+            "properties": {
+                "agent_name": {
+                    "type": "string",
+                    "default": _DEFAULT_AGENT_NAME,
+                    "description": "Internal ADK agent name",
+                },
+                "max_output_tokens": {
+                    "type": "integer",
+                    "default": _DEFAULT_MAX_OUTPUT_TOKENS,
+                    "description": "Maximum output tokens for the Gemini model",
+                },
+                "temperature": {
+                    "type": "number",
+                    "default": _DEFAULT_TEMPERATURE,
+                    "minimum": 0.0,
+                    "maximum": 2.0,
+                    "description": "Sampling temperature",
+                },
+            },
+            "additionalProperties": False,
+        }
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    async def setup(self, config: AdapterConfig) -> None:
+        """Validate config and run the shared platform setup pipeline.
+
+        Raises ``RuntimeError`` if the required API key is not set and
+        Vertex AI mode is not active.
+
+        Args:
+            config: ``AdapterConfig`` populated by the workspace runtime.
+        """
+        use_vertex = os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "").strip() in ("1", "true", "True")
+        api_key = os.environ.get("GOOGLE_API_KEY", "").strip()
+
+        if not use_vertex and not api_key:
+            raise RuntimeError(
+                "GoogleADKAdapter requires GOOGLE_API_KEY (for AI Studio) or "
+                "GOOGLE_GENAI_USE_VERTEXAI=1 with GOOGLE_CLOUD_PROJECT set."
+            )
+
+        logger.info(
+            "GoogleADKAdapter.setup: model=%s vertex=%s", config.model, use_vertex
+        )
+
+        self._setup_result = await self._common_setup(config)
+
+    async def create_executor(self, config: AdapterConfig) -> GoogleADKA2AExecutor:
+        """Build and return a ``GoogleADKA2AExecutor`` for A2A integration.
+
+        Uses the system prompt assembled by ``_common_setup()`` in ``setup()``.
+        Runtime-config keys ``agent_name``, ``max_output_tokens``, and
+        ``temperature`` are respected when present.
+
+        Args:
+            config: ``AdapterConfig`` populated by the workspace runtime.
+
+        Returns:
+            A ready-to-use ``GoogleADKA2AExecutor`` instance.
+        """
+        rc = config.runtime_config or {}
+
+        # Strip provider prefix from model, e.g. "google:gemini-2.0-flash" → "gemini-2.0-flash"
+        model = config.model
+        if ":" in model:
+            model = model.split(":", 1)[1]
+
+        system_prompt = (
+            self._setup_result.system_prompt
+            if self._setup_result is not None
+            else config.system_prompt or ""
+        )
+
+        return GoogleADKA2AExecutor(
+            model=model,
+            system_prompt=system_prompt,
+            agent_name=rc.get("agent_name", _DEFAULT_AGENT_NAME),
+            max_output_tokens=int(rc.get("max_output_tokens", _DEFAULT_MAX_OUTPUT_TOKENS)),
+            temperature=float(rc.get("temperature", _DEFAULT_TEMPERATURE)),
+            heartbeat=config.heartbeat,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Module-level alias required by the adapter autodiscovery loader
+# ---------------------------------------------------------------------------
+
+Adapter = GoogleADKAdapter
--- a/workspace-template/adapters/google-adk/requirements.txt
+++ b/workspace-template/adapters/google-adk/requirements.txt
@ -0,0 +1,7 @@
+# Google ADK adapter dependencies
+# Pin to the latest stable release — update when a new version is verified.
+google-adk==1.30.0
+
+# google-adk transitively requires google-genai; pin explicitly for
+# reproducibility (same pinning convention as other adapter requirements.txt).
+google-genai>=1.16.0
--- a/workspace-template/adapters/google-adk/test_adapter.py
+++ b/workspace-template/adapters/google-adk/test_adapter.py
@ -0,0 +1,996 @@
+"""Unit tests for adapters/google-adk/adapter.py.
+
+Coverage targets (100%)
+-----------------------
+- Module constants: _DEFAULT_AGENT_NAME, _DEFAULT_MAX_OUTPUT_TOKENS, etc.
+- MissingContent sentinel class
+- GoogleADKA2AExecutor.__init__    — field assignment + runner injection
+- GoogleADKA2AExecutor._extract_text
+- GoogleADKA2AExecutor._build_content
+- GoogleADKA2AExecutor._ensure_session — first call (create), subsequent call (skip)
+- GoogleADKA2AExecutor.execute     — happy path, empty input, API error,
+                                     no final_response events, partial text
+- GoogleADKA2AExecutor.cancel      — TaskStatusUpdateEvent emitted
+- GoogleADKAdapter.name / display_name / description / get_config_schema
+- GoogleADKAdapter.setup           — success, missing key, vertex override
+- GoogleADKAdapter.create_executor — model stripping, defaults, rc overrides
+- Adapter alias
+
+All google-adk, google-genai, and shared_runtime calls are mocked.
+No live API calls are made.
+"""
+from __future__ import annotations
+
+import sys
+from types import ModuleType
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Stub heavy external modules BEFORE the adapter is imported.
+# conftest.py already stubs: a2a, builtin_tools, langchain_core.
+# We need to additionally stub: google.adk, google.genai, shared_runtime.
+# ---------------------------------------------------------------------------
+
+
+def _make_a2a_stubs() -> None:
+    """Register minimal a2a SDK stubs in sys.modules.
+
+    Mirrors what workspace-template/tests/conftest.py does; needed because
+    this test file lives outside the ``tests/`` directory and conftest.py
+    is not automatically loaded for it.
+    """
+    if "a2a" in sys.modules:
+        # Already mocked by conftest — just ensure new_agent_text_message is passthrough
+        a2a_utils = sys.modules.get("a2a.utils")
+        if a2a_utils and callable(getattr(a2a_utils, "new_agent_text_message", None)):
+            a2a_utils.new_agent_text_message = lambda text, **kwargs: text
+        return
+
+    agent_execution_mod = ModuleType("a2a.server.agent_execution")
+
+    class AgentExecutor:
+        pass
+
+    class RequestContext:
+        pass
+
+    agent_execution_mod.AgentExecutor = AgentExecutor
+    agent_execution_mod.RequestContext = RequestContext
+
+    events_mod = ModuleType("a2a.server.events")
+
+    class EventQueue:
+        pass
+
+    events_mod.EventQueue = EventQueue
+
+    tasks_mod = ModuleType("a2a.server.tasks")
+    types_mod = ModuleType("a2a.types")
+
+    class TextPart:
+        def __init__(self, text=""):
+            self.text = text
+
+    class Part:
+        def __init__(self, root=None):
+            self.root = root
+
+    types_mod.TextPart = TextPart
+    types_mod.Part = Part
+
+    utils_mod = ModuleType("a2a.utils")
+    # Passthrough so tests can assert on the plain text string, matching the
+    # hermes_executor test convention from conftest.py.
+    utils_mod.new_agent_text_message = lambda text, **kwargs: text
+
+    a2a_mod = ModuleType("a2a")
+    a2a_server_mod = ModuleType("a2a.server")
+
+    sys.modules["a2a"] = a2a_mod
+    sys.modules["a2a.server"] = a2a_server_mod
+    sys.modules["a2a.server.agent_execution"] = agent_execution_mod
+    sys.modules["a2a.server.events"] = events_mod
+    sys.modules["a2a.server.tasks"] = tasks_mod
+    sys.modules["a2a.types"] = types_mod
+    sys.modules["a2a.utils"] = utils_mod
+
+
+def _make_google_adk_stubs() -> None:
+    """Register minimal google.adk and google.genai stubs in sys.modules."""
+    # google (top-level namespace package)
+    google_mod = sys.modules.get("google") or ModuleType("google")
+    google_mod.__path__ = []
+    sys.modules.setdefault("google", google_mod)
+
+    # google.genai
+    google_genai_mod = ModuleType("google.genai")
+    google_genai_mod.__path__ = []
+
+    google_genai_types_mod = ModuleType("google.genai.types")
+
+    class _Content:
+        def __init__(self, role="user", parts=None):
+            self.role = role
+            self.parts = parts or []
+
+    class _Part:
+        def __init__(self, text=""):
+            self.text = text
+
+    google_genai_types_mod.Content = _Content
+    google_genai_types_mod.Part = _Part
+
+    sys.modules["google.genai"] = google_genai_mod
+    sys.modules["google.genai.types"] = google_genai_types_mod
+
+    # google.adk
+    google_adk_mod = ModuleType("google.adk")
+    google_adk_mod.__path__ = []
+
+    # google.adk.agents
+    google_adk_agents_mod = ModuleType("google.adk.agents")
+
+    class _LlmAgent:
+        def __init__(self, name="", model="", instruction="", tools=None):
+            self.name = name
+            self.model = model
+            self.instruction = instruction
+            self.tools = tools or []
+
+    google_adk_agents_mod.LlmAgent = _LlmAgent
+
+    # google.adk.runners
+    google_adk_runners_mod = ModuleType("google.adk.runners")
+
+    class _Runner:
+        def __init__(self, agent=None, app_name="", session_service=None):
+            self.agent = agent
+            self.app_name = app_name
+            self.session_service = session_service
+
+        async def run_async(self, session_id, user_id, new_message):
+            # Stub — tests override this via mock runner
+            return
+            yield  # make it an async generator
+
+    google_adk_runners_mod.Runner = _Runner
+
+    # google.adk.sessions
+    google_adk_sessions_mod = ModuleType("google.adk.sessions")
+
+    class _InMemorySessionService:
+        def __init__(self):
+            self._sessions: dict = {}
+
+        async def get_session(self, app_name, user_id, session_id):
+            return self._sessions.get((app_name, user_id, session_id))
+
+        async def create_session(self, app_name, user_id, session_id):
+            self._sessions[(app_name, user_id, session_id)] = {"id": session_id}
+            return self._sessions[(app_name, user_id, session_id)]
+
+    google_adk_sessions_mod.InMemorySessionService = _InMemorySessionService
+
+    sys.modules["google.adk"] = google_adk_mod
+    sys.modules["google.adk.agents"] = google_adk_agents_mod
+    sys.modules["google.adk.runners"] = google_adk_runners_mod
+    sys.modules["google.adk.sessions"] = google_adk_sessions_mod
+
+
+def _make_shared_runtime_stub() -> None:
+    """Register shared_runtime stub with extract_message_text."""
+    if "shared_runtime" not in sys.modules:
+        mod = ModuleType("shared_runtime")
+
+        def _extract_message_text(ctx) -> str:
+            parts = getattr(getattr(ctx, "message", None), "parts", None)
+            if parts is None:
+                parts = ctx
+            texts = []
+            for p in parts or []:
+                t = getattr(p, "text", None) or getattr(
+                    getattr(p, "root", None), "text", None
+                ) or ""
+                if t:
+                    texts.append(t)
+            return " ".join(texts).strip()
+
+        mod.extract_message_text = _extract_message_text
+        sys.modules["shared_runtime"] = mod
+
+
+def _make_adapter_base_stub() -> None:
+    """Register adapter_base stub in sys.modules."""
+    if "adapter_base" not in sys.modules:
+        mod = ModuleType("adapter_base")
+        from dataclasses import dataclass, field
+        from abc import ABC, abstractmethod
+
+        @dataclass
+        class AdapterConfig:
+            model: str = "google:gemini-2.0-flash"
+            system_prompt: str | None = None
+            tools: list = field(default_factory=list)
+            runtime_config: dict = field(default_factory=dict)
+            config_path: str = "/configs"
+            workspace_id: str = ""
+            prompt_files: list = field(default_factory=list)
+            a2a_port: int = 8000
+            heartbeat: object = None
+
+        class BaseAdapter(ABC):
+            @staticmethod
+            @abstractmethod
+            def name() -> str: ...  # pragma: no cover
+
+            @staticmethod
+            @abstractmethod
+            def display_name() -> str: ...  # pragma: no cover
+
+            @staticmethod
+            @abstractmethod
+            def description() -> str: ...  # pragma: no cover
+
+            @staticmethod
+            def get_config_schema() -> dict:
+                return {}
+
+            def memory_filename(self) -> str:
+                return "CLAUDE.md"
+
+            def register_tool_hook(self, name, fn): return None  # noqa
+
+            async def transcript_lines(self, since=0, limit=100): return {"supported": False}  # noqa
+
+            def register_subagent_hook(self, name, spec): return None  # noqa
+
+            def append_to_memory_hook(self, config, filename, content): pass  # noqa
+
+            async def install_plugins_via_registry(self, config, plugins): return []  # noqa
+
+            async def inject_plugins(self, config, plugins):
+                await self.install_plugins_via_registry(config, plugins)
+
+            async def _common_setup(self, config):
+                from types import SimpleNamespace
+                return SimpleNamespace(
+                    system_prompt="mocked system prompt",
+                    loaded_skills=[],
+                    langchain_tools=[],
+                    is_coordinator=False,
+                    children=[],
+                )
+
+            @abstractmethod
+            async def setup(self, config) -> None: ...  # pragma: no cover
+
+            @abstractmethod
+            async def create_executor(self, config): ...  # pragma: no cover
+
+        mod.AdapterConfig = AdapterConfig
+        mod.BaseAdapter = BaseAdapter
+        mod.SetupResult = None
+        sys.modules["adapter_base"] = mod
+
+
+# Install all stubs before importing the module under test
+# Order matters: a2a must be stubbed before adapter.py is imported so that
+# `from a2a.utils import new_agent_text_message` resolves to the passthrough.
+_make_a2a_stubs()
+_make_google_adk_stubs()
+_make_shared_runtime_stub()
+_make_adapter_base_stub()
+
+# Now safe to import the adapter
+import sys as _sys
+import os as _os
+_adapter_dir = _os.path.dirname(_os.path.abspath(__file__))
+if _adapter_dir not in _sys.path:
+    _sys.path.insert(0, _adapter_dir)
+
+from adapter import (  # noqa: E402
+    Adapter,
+    GoogleADKA2AExecutor,
+    GoogleADKAdapter,
+    MissingContent,
+    _DEFAULT_AGENT_NAME,
+    _DEFAULT_MAX_OUTPUT_TOKENS,
+    _DEFAULT_TEMPERATURE,
+    _NO_RESPONSE_MSG,
+    _NO_TEXT_MSG,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures and helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_context(text: str, context_id: str = "ctx-test") -> MagicMock:
+    """Return a mock RequestContext with the given text in message.parts."""
+    part = MagicMock()
+    part.text = text
+    ctx = MagicMock()
+    ctx.message.parts = [part]
+    ctx.context_id = context_id
+    return ctx
+
+
+def _make_empty_context() -> MagicMock:
+    """Return a context whose message parts contain no text."""
+    part = MagicMock(spec=[])
+    part.root = MagicMock(spec=[])
+    ctx = MagicMock()
+    ctx.message.parts = [part]
+    ctx.context_id = "ctx-empty"
+    return ctx
+
+
+def _make_event(is_final: bool, text: str | None = None) -> MagicMock:
+    """Build a mock ADK Event that optionally is a final response."""
+    event = MagicMock()
+    event.is_final_response = MagicMock(return_value=is_final)
+    if text is not None:
+        part = MagicMock()
+        part.text = text
+        event.response = MagicMock()
+        event.response.content = MagicMock()
+        event.response.content.parts = [part]
+    else:
+        event.response = None
+    return event
+
+
+async def _async_gen(*events):
+    """Yield events one by one as an async generator."""
+    for e in events:
+        yield e
+
+
+def _make_runner(events=None) -> MagicMock:
+    """Return a mock Runner whose run_async yields the given events."""
+    runner = MagicMock()
+    runner.session_service = AsyncMock()
+    runner.session_service.get_session = AsyncMock(return_value=None)
+    runner.session_service.create_session = AsyncMock(return_value={"id": "s1"})
+    evts = events or []
+    runner.run_async = MagicMock(return_value=_async_gen(*evts))
+    return runner
+
+
+def _make_executor(
+    model: str = "gemini-2.0-flash",
+    system_prompt: str | None = "You are helpful.",
+    runner: MagicMock | None = None,
+) -> GoogleADKA2AExecutor:
+    """Create a GoogleADKA2AExecutor with an injected mock runner."""
+    return GoogleADKA2AExecutor(
+        model=model,
+        system_prompt=system_prompt,
+        _runner=runner or _make_runner(),
+    )
+
+
+def _make_adapter_config(**kwargs) -> object:
+    """Return an AdapterConfig with sensible defaults."""
+    from adapter_base import AdapterConfig
+    defaults = dict(
+        model="google:gemini-2.0-flash",
+        system_prompt="Test prompt.",
+        runtime_config={},
+        workspace_id="ws-test",
+    )
+    defaults.update(kwargs)
+    return AdapterConfig(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+
+def test_default_agent_name():
+    assert _DEFAULT_AGENT_NAME == "molecule-adk-agent"
+
+
+def test_default_max_output_tokens():
+    assert _DEFAULT_MAX_OUTPUT_TOKENS == 8192
+
+
+def test_default_temperature():
+    assert _DEFAULT_TEMPERATURE == 1.0
+
+
+def test_no_text_msg_constant():
+    assert "no text" in _NO_TEXT_MSG.lower()
+
+
+def test_no_response_msg_constant():
+    assert "no response" in _NO_RESPONSE_MSG.lower()
+
+
+# ---------------------------------------------------------------------------
+# MissingContent sentinel
+# ---------------------------------------------------------------------------
+
+
+def test_missing_content_has_empty_parts():
+    mc = MissingContent()
+    assert mc.parts == []
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — construction
+# ---------------------------------------------------------------------------
+
+
+def test_constructor_stores_fields():
+    runner = _make_runner()
+    executor = GoogleADKA2AExecutor(
+        model="gemini-1.5-pro",
+        system_prompt="Hello",
+        agent_name="my-agent",
+        max_output_tokens=4096,
+        temperature=0.5,
+        _runner=runner,
+    )
+    assert executor.model == "gemini-1.5-pro"
+    assert executor.system_prompt == "Hello"
+    assert executor.agent_name == "my-agent"
+    assert executor.max_output_tokens == 4096
+    assert executor.temperature == 0.5
+    assert executor._runner is runner
+    assert executor._sessions_created == set()
+
+
+def test_constructor_defaults():
+    executor = GoogleADKA2AExecutor(model="gemini-2.0-flash", _runner=_make_runner())
+    assert executor.system_prompt is None
+    assert executor.agent_name == _DEFAULT_AGENT_NAME
+    assert executor.max_output_tokens == _DEFAULT_MAX_OUTPUT_TOKENS
+    assert executor.temperature == _DEFAULT_TEMPERATURE
+    assert executor._heartbeat is None
+
+
+def test_constructor_uses_injected_runner():
+    stub = MagicMock()
+    stub.session_service = MagicMock()
+    executor = GoogleADKA2AExecutor(model="gemini-2.0-flash", _runner=stub)
+    assert executor._runner is stub
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — _extract_text
+# ---------------------------------------------------------------------------
+
+
+def test_extract_text_returns_message_text():
+    executor = _make_executor()
+    ctx = _make_context("Hello world")
+    result = executor._extract_text(ctx)
+    assert result == "Hello world"
+
+
+def test_extract_text_empty_context():
+    executor = _make_executor()
+    ctx = _make_empty_context()
+    result = executor._extract_text(ctx)
+    assert result == ""
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — _build_content
+# ---------------------------------------------------------------------------
+
+
+def test_build_content_creates_content_object():
+    executor = _make_executor()
+    content = executor._build_content("test message")
+    assert content.role == "user"
+    assert len(content.parts) == 1
+    assert content.parts[0].text == "test message"
+
+
+def test_build_content_empty_string():
+    executor = _make_executor()
+    content = executor._build_content("")
+    assert content.parts[0].text == ""
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — _ensure_session
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_ensure_session_creates_when_not_exists():
+    runner = _make_runner()
+    runner.session_service.get_session = AsyncMock(return_value=None)
+    executor = GoogleADKA2AExecutor(
+        model="gemini-2.0-flash", agent_name="test-agent", _runner=runner
+    )
+    await executor._ensure_session("session-1", "user-1")
+    runner.session_service.create_session.assert_called_once_with(
+        app_name="test-agent",
+        user_id="user-1",
+        session_id="session-1",
+    )
+    assert "session-1" in executor._sessions_created
+
+
+@pytest.mark.asyncio
+async def test_ensure_session_skips_if_already_tracked():
+    runner = _make_runner()
+    executor = GoogleADKA2AExecutor(
+        model="gemini-2.0-flash", _runner=runner
+    )
+    executor._sessions_created.add("session-x")
+    await executor._ensure_session("session-x", "user-1")
+    # Neither get_session nor create_session should be called
+    runner.session_service.get_session.assert_not_called()
+    runner.session_service.create_session.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_ensure_session_skips_create_when_existing():
+    runner = _make_runner()
+    runner.session_service.get_session = AsyncMock(return_value={"id": "s1"})
+    executor = GoogleADKA2AExecutor(
+        model="gemini-2.0-flash", agent_name="test-agent", _runner=runner
+    )
+    await executor._ensure_session("session-existing", "user-1")
+    runner.session_service.create_session.assert_not_called()
+    assert "session-existing" in executor._sessions_created
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — execute: happy path
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_execute_returns_response_text():
+    event = _make_event(is_final=True, text="The answer is 42.")
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("What is 6×7?")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with("The answer is 42.")
+
+
+@pytest.mark.asyncio
+async def test_execute_concatenates_multiple_final_parts():
+    part1 = MagicMock()
+    part1.text = "Hello "
+    part2 = MagicMock()
+    part2.text = "world"
+    event = MagicMock()
+    event.is_final_response = MagicMock(return_value=True)
+    event.response = MagicMock()
+    event.response.content = MagicMock()
+    event.response.content.parts = [part1, part2]
+
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("Hi")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with("Hello world")
+
+
+@pytest.mark.asyncio
+async def test_execute_skips_non_final_events():
+    non_final = _make_event(is_final=False, text="intermediate")
+    final = _make_event(is_final=True, text="final answer")
+    runner = _make_runner(events=[non_final, final])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("question")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    enqueued = eq.enqueue_event.call_args[0][0]
+    assert enqueued == "final answer"
+
+
+@pytest.mark.asyncio
+async def test_execute_fallback_when_no_final_response_events():
+    non_final = _make_event(is_final=False)
+    runner = _make_runner(events=[non_final])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("hello")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
+
+
+@pytest.mark.asyncio
+async def test_execute_fallback_when_response_is_none():
+    event = MagicMock()
+    event.is_final_response = MagicMock(return_value=True)
+    event.response = None  # no response object
+
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("ping")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
+
+
+@pytest.mark.asyncio
+async def test_execute_fallback_when_parts_have_no_text():
+    part = MagicMock()
+    part.text = None  # no text on the part
+    event = MagicMock()
+    event.is_final_response = MagicMock(return_value=True)
+    event.response = MagicMock()
+    event.response.content = MagicMock()
+    event.response.content.parts = [part]
+
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("ping")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
+
+
+@pytest.mark.asyncio
+async def test_execute_fallback_when_response_content_is_none():
+    event = MagicMock()
+    event.is_final_response = MagicMock(return_value=True)
+    event.response = MagicMock()
+    event.response.content = None  # content is None → MissingContent sentinel
+
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("ping")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
+
+
+@pytest.mark.asyncio
+async def test_execute_uses_context_id_as_session_id():
+    event = _make_event(is_final=True, text="ok")
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("hello", context_id="ctx-abc-123")
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    runner.run_async.assert_called_once()
+    call_kwargs = runner.run_async.call_args[1]
+    assert call_kwargs["session_id"] == "ctx-abc-123"
+    assert call_kwargs["user_id"] == "molecule-user"
+
+
+@pytest.mark.asyncio
+async def test_execute_falls_back_to_default_session_id_when_context_id_is_none():
+    event = _make_event(is_final=True, text="ok")
+    runner = _make_runner(events=[event])
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_context("hello")
+    ctx.context_id = None  # override
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    call_kwargs = runner.run_async.call_args[1]
+    assert call_kwargs["session_id"] == "default-session"
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — execute: empty input
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_execute_empty_input_returns_error():
+    runner = _make_runner()
+    executor = _make_executor(runner=runner)
+
+    ctx = _make_empty_context()
+    eq = AsyncMock()
+    await executor.execute(ctx, eq)
+
+    eq.enqueue_event.assert_called_once_with(_NO_TEXT_MSG)
+    runner.run_async.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — execute: error handling
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_execute_api_error_returns_sanitized_message():
+    runner = _make_runner()
+
+    class _FakeAPIError(Exception):
+        pass
+
+    async def _raise(*args, **kwargs):
+        raise _FakeAPIError("api_key=secret token_limit_exceeded")
+        yield  # make it an async generator
+
+    runner.run_async = MagicMock(return_value=_raise())
+    executor = _make_executor(runner=runner)
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("hello"), eq)
+
+    enqueued = eq.enqueue_event.call_args[0][0]
+    assert enqueued == "Agent error: _FakeAPIError"
+    assert "secret" not in enqueued
+
+
+@pytest.mark.asyncio
+async def test_execute_api_error_is_logged(caplog):
+    import logging
+
+    runner = _make_runner()
+
+    async def _raise(*args, **kwargs):
+        raise ValueError("bad request")
+        yield  # make it an async generator
+
+    runner.run_async = MagicMock(return_value=_raise())
+    executor = _make_executor(runner=runner)
+
+    with caplog.at_level(logging.ERROR, logger="adapter"):
+        await executor.execute(_make_context("hello"), AsyncMock())
+
+    assert any("execution error" in r.message.lower() for r in caplog.records)
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKA2AExecutor — cancel
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_cancel_emits_canceled_event():
+    executor = _make_executor()
+
+    import a2a.types as a2a_types
+
+    class _TaskState:
+        canceled = "canceled"
+
+    class _TaskStatus:
+        def __init__(self, state):
+            self.state = state
+
+    class _TaskStatusUpdateEvent:
+        def __init__(self, status, final):
+            self.status = status
+            self.final = final
+
+    a2a_types.TaskState = _TaskState
+    a2a_types.TaskStatus = _TaskStatus
+    a2a_types.TaskStatusUpdateEvent = _TaskStatusUpdateEvent
+
+    eq = AsyncMock()
+    ctx = MagicMock()
+    await executor.cancel(ctx, eq)
+
+    eq.enqueue_event.assert_called_once()
+    event = eq.enqueue_event.call_args[0][0]
+    assert isinstance(event, _TaskStatusUpdateEvent)
+    assert event.status.state == "canceled"
+    assert event.final is True
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKAdapter — identity methods
+# ---------------------------------------------------------------------------
+
+
+def test_adapter_name():
+    assert GoogleADKAdapter.name() == "google-adk"
+
+
+def test_adapter_display_name():
+    assert "Google ADK" in GoogleADKAdapter.display_name()
+
+
+def test_adapter_description():
+    desc = GoogleADKAdapter.description()
+    assert "ADK" in desc or "Google" in desc
+
+
+def test_adapter_get_config_schema():
+    schema = GoogleADKAdapter.get_config_schema()
+    assert schema["type"] == "object"
+    assert "agent_name" in schema["properties"]
+    assert "max_output_tokens" in schema["properties"]
+    assert "temperature" in schema["properties"]
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKAdapter — setup
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_setup_succeeds_with_api_key(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "fake-api-key")
+    monkeypatch.delenv("GOOGLE_GENAI_USE_VERTEXAI", raising=False)
+
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config()
+
+    await adapter.setup(config)
+
+    assert adapter._setup_result is not None
+    assert adapter._setup_result.system_prompt == "mocked system prompt"
+
+
+@pytest.mark.asyncio
+async def test_setup_succeeds_with_vertex_ai(monkeypatch):
+    monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
+    monkeypatch.setenv("GOOGLE_GENAI_USE_VERTEXAI", "1")
+
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config()
+
+    await adapter.setup(config)
+
+    assert adapter._setup_result is not None
+
+
+@pytest.mark.asyncio
+async def test_setup_succeeds_with_vertex_ai_true_string(monkeypatch):
+    monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
+    monkeypatch.setenv("GOOGLE_GENAI_USE_VERTEXAI", "True")
+
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config()
+
+    await adapter.setup(config)
+    assert adapter._setup_result is not None
+
+
+@pytest.mark.asyncio
+async def test_setup_raises_without_credentials(monkeypatch):
+    monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
+    monkeypatch.delenv("GOOGLE_GENAI_USE_VERTEXAI", raising=False)
+
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config()
+
+    with pytest.raises(RuntimeError, match="GOOGLE_API_KEY"):
+        await adapter.setup(config)
+
+
+# ---------------------------------------------------------------------------
+# GoogleADKAdapter — create_executor
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_create_executor_strips_google_prefix(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config(model="google:gemini-2.0-flash")
+    await adapter.setup(config)
+
+    executor = await adapter.create_executor(config)
+    assert executor.model == "gemini-2.0-flash"
+
+
+@pytest.mark.asyncio
+async def test_create_executor_no_prefix_passthrough(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config(model="gemini-1.5-pro")
+    await adapter.setup(config)
+
+    executor = await adapter.create_executor(config)
+    assert executor.model == "gemini-1.5-pro"
+
+
+@pytest.mark.asyncio
+async def test_create_executor_uses_setup_system_prompt(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config()
+    await adapter.setup(config)
+
+    executor = await adapter.create_executor(config)
+    assert executor.system_prompt == "mocked system prompt"
+
+
+@pytest.mark.asyncio
+async def test_create_executor_runtime_config_overrides(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config(
+        runtime_config={
+            "agent_name": "custom-agent",
+            "max_output_tokens": 512,
+            "temperature": 0.3,
+        }
+    )
+    await adapter.setup(config)
+
+    executor = await adapter.create_executor(config)
+    assert executor.agent_name == "custom-agent"
+    assert executor.max_output_tokens == 512
+    assert executor.temperature == 0.3
+
+
+@pytest.mark.asyncio
+async def test_create_executor_defaults_without_runtime_config(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config(runtime_config={})
+    await adapter.setup(config)
+
+    executor = await adapter.create_executor(config)
+    assert executor.agent_name == _DEFAULT_AGENT_NAME
+    assert executor.max_output_tokens == _DEFAULT_MAX_OUTPUT_TOKENS
+    assert executor.temperature == _DEFAULT_TEMPERATURE
+
+
+@pytest.mark.asyncio
+async def test_create_executor_without_setup_uses_config_system_prompt(monkeypatch):
+    """create_executor without prior setup falls back to config.system_prompt."""
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config(system_prompt="fallback prompt")
+    # Intentionally skip setup() — _setup_result remains None
+
+    executor = await adapter.create_executor(config)
+    assert executor.system_prompt == "fallback prompt"
+
+
+@pytest.mark.asyncio
+async def test_create_executor_without_setup_no_system_prompt(monkeypatch):
+    """create_executor without setup and no system_prompt → empty string."""
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    config = _make_adapter_config(system_prompt=None)
+    # Skip setup()
+
+    executor = await adapter.create_executor(config)
+    assert executor.system_prompt == ""
+
+
+@pytest.mark.asyncio
+async def test_create_executor_heartbeat_passed(monkeypatch):
+    monkeypatch.setenv("GOOGLE_API_KEY", "key")
+    adapter = GoogleADKAdapter()
+    heartbeat = MagicMock()
+    config = _make_adapter_config(heartbeat=heartbeat)
+    await adapter.setup(config)
+
+    executor = await adapter.create_executor(config)
+    assert executor._heartbeat is heartbeat
+
+
+# ---------------------------------------------------------------------------
+# Adapter alias
+# ---------------------------------------------------------------------------
+
+
+def test_adapter_alias_is_google_adk_adapter():
+    assert Adapter is GoogleADKAdapter
--- a/workspace-template/entrypoint.sh
+++ b/workspace-template/entrypoint.sh
@ -55,6 +55,31 @@ else:
 echo "=== Molecule AI Workspace ==="
 echo "Runtime: $RUNTIME"

+# ──────────────────────────────────────────────────────────
+# GitHub credential helper — issue #547
+# ──────────────────────────────────────────────────────────
+# GitHub App installation tokens expire after ~60 min.  The platform
+# exposes GET /admin/github-installation-token (backed by the plugin's
+# in-process refreshing cache) so workspaces can always get a valid
+# token without restarting.
+#
+# Register molecule-git-token-helper.sh as the git credential helper for
+# github.com.  git calls it on every push/fetch; it hits the platform
+# endpoint and emits a fresh token.  Falls through to any existing
+# credential helper (e.g. operator .env PAT) if the platform is
+# unreachable.
+#
+# Idempotent — safe to re-run on restart.
+HELPER_SCRIPT="/workspace-template/scripts/molecule-git-token-helper.sh"
+if [ -f "${HELPER_SCRIPT}" ]; then
+    git config --global \
+        "credential.https://github.com.helper" \
+        "!${HELPER_SCRIPT}" 2>/dev/null || true
+    echo "[entrypoint] git credential helper registered (molecule-git-token-helper)"
+else
+    echo "[entrypoint] WARNING: molecule-git-token-helper.sh not found at ${HELPER_SCRIPT} — GitHub tokens may expire after 60 min"
+fi
+
 # NOTE: Adapter-specific deps are now pre-installed in each adapter's Docker image
 # (standalone template repos). Each image installs molecule-ai-workspace-runtime
 # from PyPI plus the adapter-specific requirements. No per-runtime pip install needed here.
--- a/workspace-template/plugins_registry/builtins.py
+++ b/workspace-template/plugins_registry/builtins.py
@ -319,9 +319,25 @@ def _deep_merge_hooks(existing: dict, fragment: dict) -> dict:
    out.setdefault("hooks", {})
    for event, handlers in fragment.get("hooks", {}).items():
        out["hooks"].setdefault(event, [])
-        out["hooks"][event].extend(handlers)
-    for key, val in fragment.items():
-        if key == "hooks":
+        # Build a set of already-present handler fingerprints so that
+        # re-installing the same plugin fragment does not append duplicates.
+        # Key: (matcher, frozenset-of-commands) — same logic the issue spec
+        # describes. Two handlers are considered identical when they watch the
+        # same matcher pattern and invoke exactly the same set of commands.
+        seen: set[tuple[str, frozenset[str]]] = {
+            (h.get("matcher", ""), frozenset(c.get("command", "") for c in h.get("hooks", [])))
+            for h in out["hooks"][event]
+        }
+        for handler in handlers:
+            hkey = (
+                handler.get("matcher", ""),
+                frozenset(c.get("command", "") for c in handler.get("hooks", [])),
+            )
+            if hkey not in seen:
+                seen.add(hkey)
+                out["hooks"][event].append(handler)
+    for top_key, val in fragment.items():
+        if top_key == "hooks":
            continue
-        out.setdefault(key, val)
+        out.setdefault(top_key, val)
    return out
--- a/workspace-template/scripts/molecule-git-token-helper.sh
+++ b/workspace-template/scripts/molecule-git-token-helper.sh
@ -0,0 +1,112 @@
+#!/bin/bash
+# molecule-git-token-helper.sh — git credential helper for GitHub App tokens
+#
+# Fetches a fresh GitHub App installation token from the Molecule AI
+# platform endpoint GET /admin/github-installation-token on every git
+# push/fetch, so workspace containers never use an expired GH_TOKEN after
+# the ~60 min GitHub App token TTL.
+#
+# # Setup (called once at provision time or initial_prompt)
+#
+#   git config --global \
+#     "credential.https://github.com.helper" \
+#     "!/workspace-template/scripts/molecule-git-token-helper.sh"
+#
+# # How git calls this helper
+#
+# git passes the action as the first positional arg.  The protocol is:
+#   get   → output credentials on stdout (we handle this)
+#   store → persist credentials (no-op — we never cache)
+#   erase → revoke credentials (no-op — platform manages lifecycle)
+#
+# On `get`, git reads key=value pairs terminated by an empty line.
+# We must emit at minimum:
+#   username=x-access-token
+#   password=<token>
+#   (blank line)
+#
+# # Auth
+#
+# The platform endpoint requires a valid workspace bearer token.  The
+# token is stored at ${CONFIGS_DIR}/.auth_token (written by platform_auth.py
+# on first /registry/register).  Workspace env var PLATFORM_URL defaults
+# to http://platform:8080.
+#
+# # Fallback
+#
+# If the platform endpoint is unreachable (e.g. network partition) or
+# returns non-200, the script exits 1 without printing credentials so git
+# will fall through to the next helper in the chain (if any).  This
+# preserves the operator's fallback PAT from .env if present.
+#
+# # gh CLI re-auth (30-min cron)
+#
+# To also fix `gh` CLI auth, run this from a workspace cron prompt:
+#
+#   token=$(bash /workspace-template/scripts/molecule-git-token-helper.sh _fetch_token)
+#   echo "$token" | gh auth login --with-token
+#
+# (The _fetch_token private action returns only the raw token string.)
+#
+set -euo pipefail
+
+PLATFORM_URL="${PLATFORM_URL:-http://platform:8080}"
+CONFIGS_DIR="${CONFIGS_DIR:-/configs}"
+TOKEN_FILE="${CONFIGS_DIR}/.auth_token"
+ENDPOINT="${PLATFORM_URL}/admin/github-installation-token"
+
+# _fetch_token — internal helper; also callable directly from cron.
+# Outputs the raw token string on success; exits non-zero on failure.
+_fetch_token() {
+    if [ ! -f "${TOKEN_FILE}" ]; then
+        echo "[molecule-git-token-helper] .auth_token not found at ${TOKEN_FILE}" >&2
+        exit 1
+    fi
+
+    bearer=$(cat "${TOKEN_FILE}" | tr -d '[:space:]')
+    if [ -z "${bearer}" ]; then
+        echo "[molecule-git-token-helper] .auth_token is empty" >&2
+        exit 1
+    fi
+
+    response=$(curl -sf \
+        -H "Authorization: Bearer ${bearer}" \
+        -H "Accept: application/json" \
+        --max-time 10 \
+        "${ENDPOINT}" 2>&1) || {
+        echo "[molecule-git-token-helper] platform request failed: ${response}" >&2
+        exit 1
+    }
+
+    # Parse {"token":"ghs_...","expires_at":"..."} with sed (no jq dependency).
+    token=$(echo "${response}" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
+    if [ -z "${token}" ]; then
+        echo "[molecule-git-token-helper] empty token in platform response: ${response}" >&2
+        exit 1
+    fi
+
+    echo "${token}"
+}
+
+ACTION="${1:-get}"
+
+case "${ACTION}" in
+    get)
+        token=$(_fetch_token) || exit 1
+        # Emit git credential protocol response.
+        printf 'username=x-access-token\n'
+        printf 'password=%s\n' "${token}"
+        printf '\n'
+        ;;
+    store|erase)
+        # No-op — the platform manages token lifecycle.
+        ;;
+    _fetch_token)
+        # Private action for cron-based gh auth login --with-token.
+        _fetch_token
+        ;;
+    *)
+        echo "[molecule-git-token-helper] unknown action: ${ACTION}" >&2
+        exit 1
+        ;;
+esac
--- a/workspace-template/tests/test_plugins_builtins.py
+++ b/workspace-template/tests/test_plugins_builtins.py
@ -7,6 +7,7 @@ Covers:
  - Empty rules directory doesn't write an empty block
  - README.md / CHANGELOG.md are skipped at the root (not treated as fragments)
  - Uninstall is safe on a plugin that was never installed
+  - _deep_merge_hooks deduplication (issue #566)
 """

 from __future__ import annotations
@ -393,3 +394,90 @@ async def test_setup_sh_absent_no_warning(tmp_path: Path):
    result = await AgentskillsAdaptor("p", "claude_code").install(_make_ctx(configs, plugin))

    assert result.warnings == []
+
+
+# ---------------------------------------------------------------------------
+# _deep_merge_hooks deduplication — issue #566
+# ---------------------------------------------------------------------------
+
+from plugins_registry.builtins import _deep_merge_hooks  # noqa: E402
+
+
+def _make_fragment(event: str, matcher: str, command: str) -> dict:
+    """Build a minimal settings-fragment dict for one hook handler."""
+    return {
+        "hooks": {
+            event: [
+                {
+                    "matcher": matcher,
+                    "hooks": [{"type": "command", "command": command}],
+                }
+            ]
+        }
+    }
+
+
+def test_deep_merge_hooks_first_install_adds_handler():
+    """Merging into an empty dict adds the handler exactly once."""
+    result = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
+    handlers = result["hooks"]["PreToolUse"]
+    assert len(handlers) == 1
+    assert handlers[0]["matcher"] == "Bash"
+
+
+def test_deep_merge_hooks_dedup_on_reinstall():
+    """Merging the same fragment twice must not duplicate the handler."""
+    fragment = _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh")
+    once = _deep_merge_hooks({}, fragment)
+    twice = _deep_merge_hooks(once, fragment)
+    assert len(twice["hooks"]["PreToolUse"]) == 1, (
+        "Re-installing the same fragment must not append a duplicate handler"
+    )
+
+
+def test_deep_merge_hooks_dedup_three_reinstalls():
+    """Issue #566 reported 3–4× duplication — verify three installs still yield one entry."""
+    fragment = _make_fragment("PostToolUse", "Write", "/hooks/format.sh")
+    state = {}
+    for _ in range(3):
+        state = _deep_merge_hooks(state, fragment)
+    assert len(state["hooks"]["PostToolUse"]) == 1
+
+
+def test_deep_merge_hooks_different_matchers_both_kept():
+    """Two handlers with different matchers must co-exist — dedup must not over-filter."""
+    state = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
+    state = _deep_merge_hooks(state, _make_fragment("PreToolUse", "Edit", "/hooks/lint.sh"))
+    assert len(state["hooks"]["PreToolUse"]) == 2
+
+
+def test_deep_merge_hooks_different_commands_both_kept():
+    """Same matcher but different commands → both handlers must be kept."""
+    state = _deep_merge_hooks({}, _make_fragment("PreToolUse", "Bash", "/hooks/lint.sh"))
+    state = _deep_merge_hooks(state, _make_fragment("PreToolUse", "Bash", "/hooks/security.sh"))
+    assert len(state["hooks"]["PreToolUse"]) == 2
+
+
+def test_deep_merge_hooks_existing_user_hooks_preserved():
+    """Existing hooks in settings.json that don't match the fragment must survive."""
+    existing = {
+        "hooks": {
+            "PreToolUse": [
+                {"matcher": "Bash", "hooks": [{"type": "command", "command": "/user/custom.sh"}]}
+            ]
+        }
+    }
+    fragment = _make_fragment("PreToolUse", "Edit", "/hooks/lint.sh")
+    result = _deep_merge_hooks(existing, fragment)
+    matchers = {h["matcher"] for h in result["hooks"]["PreToolUse"]}
+    assert matchers == {"Bash", "Edit"}
+
+
+def test_deep_merge_hooks_top_level_keys_merged():
+    """Non-hook top-level keys in the fragment are merged into the output."""
+    existing = {"someKey": "old"}
+    fragment = {"someKey": "new", "anotherKey": "value", "hooks": {}}
+    result = _deep_merge_hooks(existing, fragment)
+    # setdefault semantics: existing keys win, new keys are added
+    assert result["someKey"] == "old"
+    assert result["anotherKey"] == "value"