diff --git a/canvas/src/app/orgs/page.tsx b/canvas/src/app/orgs/page.tsx
index 29a32632..e8163e24 100644
--- a/canvas/src/app/orgs/page.tsx
+++ b/canvas/src/app/orgs/page.tsx
@@ -154,7 +154,7 @@ function CheckoutBanner() {
     <div className="mb-6 rounded-lg border border-emerald-700 bg-emerald-950 p-4">
       <p className="text-sm text-emerald-200">
         ✓ Payment confirmed. Your workspace is spinning up now — this page
-        refreshes automatically when it&apos;s ready.
+        refreshes automatically when it's ready.
       </p>
     </div>
   );
@@ -318,7 +318,7 @@ function EmptyState({ banner }: { banner?: React.ReactNode }) {
     <Shell>
       {banner}
       <p className="text-zinc-300">
-        You don&apos;t have any organizations yet. Create one to get started — your
+        You don't have any organizations yet. Create one to get started — your
         workspace spins up automatically once billing is set up.
       </p>
       <div className="mt-6">
diff --git a/canvas/src/components/Tooltip.tsx b/canvas/src/components/Tooltip.tsx
index 8afb8543..087fcd7c 100644
--- a/canvas/src/components/Tooltip.tsx
+++ b/canvas/src/components/Tooltip.tsx
@@ -48,6 +48,7 @@ export function Tooltip({ text, children }: Props) {
   }, []);
 
   const onBlur = useCallback(() => {
+    clearTimeout(timerRef.current);
     setShow(false);
   }, []);
 
diff --git a/docs/agent-runtime/workspace-runtime.md b/docs/agent-runtime/workspace-runtime.md
index 72ad0dac..2ee03b6b 100644
--- a/docs/agent-runtime/workspace-runtime.md
+++ b/docs/agent-runtime/workspace-runtime.md
@@ -144,7 +144,7 @@ External workspaces run outside the platform's Docker infrastructure — on your
 | Liveness | Docker health sweep | Heartbeat TTL (90s offline threshold) |
 | Registration | Automatic at container start | Manual: `POST /workspaces` + `POST /registry/register` |
 | Token | Inherited from container env | Minted at registration, shown once |
-| Secrets | Baked in image or env var | Pulled from platform at boot via `GET /workspaces/:id/secrets/values` |
+| Secrets | Baked in image or env var | Pulled from platform at boot via `GET /workspaces/:id/secrets` |
 
 ### Registration flow
 
@@ -185,7 +185,7 @@ The platform returns a 256-bit bearer token — save it, it is shown only once.
 **3. Pull secrets at boot:**
 
 ```bash
-curl http://localhost:8080/workspaces/ws-xyz/secrets/values \
+curl http://localhost:8080/workspaces/ws-xyz/secrets \
   -H "Authorization: Bearer <your-token>"
 ```
 
diff --git a/docs/blog/2026-04-21-skills-vs-bundled-tools/index.md b/docs/blog/2026-04-21-skills-vs-bundled-tools/index.md
index ff01cd97..17048281 100644
--- a/docs/blog/2026-04-21-skills-vs-bundled-tools/index.md
+++ b/docs/blog/2026-04-21-skills-vs-bundled-tools/index.md
@@ -95,7 +95,7 @@ Here's how the comparison lands:
 
 If you want to evaluate Molecule AI's skills coverage, start here:
 
-→ [MCP browser automation guide](/docs/blog/browser-automation-ai-agents-mcp) — browser tools via Chrome DevTools Protocol, same capability as Hermes' built-in browser
+→ [MCP browser automation guide](/blog/browser-automation-ai-agents-mcp) — browser tools via Chrome DevTools Protocol, same capability as Hermes' built-in browser
 → [TTS and image generation skills](/docs/guides/skill-catalog) — community-contributed, versioned, swappable
 → [Org-scoped API keys](/docs/guides/org-api-keys.md) — production auth and audit
 
diff --git a/docs/guides/skill-catalog.md b/docs/guides/skill-catalog.md
new file mode 100644
index 00000000..337becc2
--- /dev/null
+++ b/docs/guides/skill-catalog.md
@@ -0,0 +1,196 @@
+# Skill Catalog
+
+Skills extend what a workspace agent can do — from browser automation
+and TTS to research tools and custom API integrations. This page covers
+available skill types, how to install them, and how to manage their
+versions.
+
+> **Note:** Molecule AI does not ship a hosted skill marketplace. All
+> skills are installed from local packages, GitHub URLs, or community
+> bundles. See [Skill Lifecycle](#lifecycle) for how to publish and
+> distribute skills within your org.
+
+## Available Skill Types
+
+The skills ecosystem covers the same capabilities as Hermes Tool Gateway
+and more:
+
+| Category | Skill | What it does | Provider options |
+|----------|-------|-------------|-----------------|
+| **Browser** | `browser-automation` | Chrome DevTools Protocol via MCP — navigate, query DOM, screenshot, fill forms. Same engine as Hermes' built-in browser tool. | Built-in (CDP); swap via skill version |
+| **TTS** | `tts` | Text-to-speech generation. Streams audio to output. | OpenAI, ElevenLabs, or self-hosted |
+| **Image gen** | `image-generation` | Generates images from text prompts. | OpenAI DALL·E, Stability AI, or self-hosted |
+| **Web search** | `web-search` | Structured web search with result parsing. | Brave, SerpAPI, or custom |
+| **Research** | `arxiv-research` | Searches and summarizes arXiv papers. | Community bundle |
+| **Code** | `code-analysis` | Static analysis, diff review, complexity scoring. | Built-in |
+| **SEO** | `seo-audit` | Lighthouse audit + GSC keyword extraction. | Built-in |
+| **Social** | `social-post` | Formats and posts to social channels. | Built-in |
+
+All skills are open source. Source is visible — inspect the `SKILL.md`
+and `tools/` before installing.
+
+## Installing a Skill
+
+### From the built-in catalog
+
+```bash
+# Install browser automation
+molecule skills install browser-automation
+
+# Install TTS with a specific provider
+molecule skills install tts --provider openai
+
+# Install a specific version
+molecule skills install browser-automation --version 1.2.0
+```
+
+### From GitHub
+
+```bash
+molecule skills install \
+  https://github.com/acme/molecule-skills/tree/main/browser-automation
+```
+
+### From a community bundle
+
+Community skills are hosted on GitHub and referenced by slug:
+
+```bash
+molecule skills install arxiv-research --from community
+```
+
+Community skills are reviewed by the Molecule AI team before being
+listed. Submit a skill for review by opening a PR against
+[`molecule-ai/skills`](https://github.com/Molecule-AI/skills).
+
+## Installing via config.yaml
+
+Skills can also be declared in the workspace config file:
+
+```yaml
+skills:
+  - name: browser-automation
+    source: builtin
+  - name: tts
+    source: builtin
+    config:
+      provider: openai
+  - name: arxiv-research
+    source: community
+```
+
+On workspace boot, the runtime validates each skill and loads the
+`SKILL.md` + tools into the agent's context.
+
+## Version Management
+
+Skills are versioned with semantic versioning. Pin to a known-good
+release to prevent unexpected behavior changes:
+
+```bash
+# Pin to a specific version
+molecule skills install tts --version 1.1.0
+
+# Upgrade to latest
+molecule skills upgrade tts
+
+# View installed version
+molecule skills list
+```
+
+Upgrading is safe — the skill loader validates the new package on
+installation. If the new version has breaking changes, the workspace logs
+a warning and keeps the previous version active until you restart.
+
+## Custom Skills
+
+Write a skill for your team's specific workflow:
+
+```bash
+# Scaffold a new skill
+molecule skills init my-custom-skill
+```
+
+This creates:
+
+```
+skills/my-custom-skill/
++-- SKILL.md              # instructions + frontmatter
++-- tools/
+|   +-- my_tool.py        # MCP tool using @tool decorator
++-- examples/             # few-shot examples
++-- templates/            # reference files
+```
+
+See [Skills Reference](../agent-runtime/skills.md) for the full
+`SKILL.md` format and frontmatter schema.
+
+## Skill Lifecycle
+
+```
+Author writes SKILL.md + tools/
+      |
+      v
+Install into workspace (local or GitHub)
+      |
+      v
+Workspace loads skill on next boot / hot-reload
+      |
+      v
+Agent sees skill in tool context
+      |
+      v
+(Optional) Publish to org bundle or community
+```
+
+**Publishing to your org:** Bundle skills with workspace templates so
+every new workspace in a role gets the same capability set:
+
+```bash
+molecule skills bundle my-custom-skill --output ./org-templates/my-role/
+```
+
+**Publishing to the community:** Open a PR against
+[`molecule-ai/skills`](https://github.com/Molecule-AI/skills) with a
+complete skill package. Community skills are reviewed for security and
+correctness before listing.
+
+## Removing a Skill
+
+```bash
+molecule skills uninstall browser-automation
+```
+
+Or remove from `config.yaml` and trigger a hot-reload by touching the
+file:
+
+```bash
+touch /configs/config.yaml
+```
+
+The workspace detects the change, rescans skills, and updates the Agent
+Card within ~3 seconds.
+
+## Troubleshooting
+
+**Skill not found:** Check the skill name matches the catalog exactly.
+Skill names are lowercase with hyphens (`browser-automation`, not
+`browser_automation` or `BrowserAutomation`).
+
+**Skill loads but tools are missing:** Verify the `tools/` folder
+contains valid Python files with `@tool`-decorated functions. See
+[Skills Reference — Tool Interface](../agent-runtime/skills.md#tool-interface).
+
+**Provider auth error:** Ensure the required environment variable (e.g.
+`OPENAI_API_KEY`) is set in the workspace config or secrets.
+
+## Related Docs
+
+- [Skills Reference](../agent-runtime/skills.md) — Full SKILL.md format,
+  frontmatter schema, and tool interface
+- [Config Format](../agent-runtime/config-format.md) — How skills are
+  declared in `config.yaml`
+- [Plugin System](../plugins/overview.md) — Installing full plugin
+  packages (skills + MCP servers + shared rules)
+- [Remote Agent Tutorial](../tutorials/register-remote-agent.md) —
+  Installing skills on remote (external) agents
\ No newline at end of file
diff --git a/docs/marketing/discord-adapter-day2/announcement.md b/docs/marketing/discord-adapter-day2/announcement.md
new file mode 100644
index 00000000..a75db641
--- /dev/null
+++ b/docs/marketing/discord-adapter-day2/announcement.md
@@ -0,0 +1,81 @@
+# Discord Adapter Day 2 — Community Copy
+
+> Posted 2026-04-21. Discord adapter launched Day 1; Day 2 covers Reddit, Hacker News.
+> Blog URL: https://moleculesai.app/blog/discord-adapter-launch
+> PR: https://github.com/Molecule-AI/molecule-core/pull/656
+
+---
+
+## Reddit r/LocalLLaMA
+
+**Title:** Molecule AI now connects to Discord via a webhook — no bot account, no Gateway, no OAuth
+
+```
+Molecule AI workspaces can now send messages to Discord and receive slash commands using only a webhook URL. No Discord Developer Portal, no intents, no bot token — just an inbound webhook and your agent is in the channel.
+
+Built it as a proof-of-concept to keep our own team workflow on Discord without the overhead of a full bot app. Figured other people might want the same thing.
+
+The adapter uses Discord's built-in webhook delivery for outbound + slash command reception. No polling. No Gateway connection. Works behind NAT — the agent initiates all outbound connections to the platform, which proxies to Discord.
+
+Here's the architecture gist:
+- Outbound: POST to Discord webhook URL (standard, no auth beyond the URL token)
+- Inbound: Discord delivers slash command payloads to a platform endpoint; platform fans out to the relevant workspace via A2A
+- No Discord bot app required. No Developer Portal setup.
+
+If your team lives in Discord and you want an AI agent that can post summaries, respond to /ask commands, and route alerts — it's now a webhook URL and a config line.
+
+Demo repo and docs: https://github.com/Molecule-AI/molecule-core/tree/main/docs/blog/2026-04-21-discord-adapter
+
+Happy to answer questions about the adapter design.
+```
+
+**Tags:** `discord`, `mcp`, `molecule-ai`, `webhook`, `ai-agents`
+
+---
+
+## Reddit r/MachineLearning
+
+**Title:** Show HN: Molecule AI Discord adapter — AI agents in Discord via webhook, no bot account needed
+
+```
+Show HN: Molecule AI Discord adapter — webhook-only, no Gateway connection required
+
+HN: built a Discord integration for Molecule AI workspaces that requires zero bot app setup. It's just a webhook URL and an agent config.
+
+The problem: Discord bot integrations typically require a Developer Portal app, OAuth flow, Gateway connection management, intent configuration, and rate limit handling. That's a meaningful chunk of work before your agent can say hello.
+
+The approach: use Discord's native webhook delivery for inbound slash commands (no Gateway) and standard webhook POST for outbound messages. The platform acts as a proxy — Discord delivers to the platform endpoint, the platform routes to the relevant workspace via A2A. Works behind NAT since the agent initiates outbound connections.
+
+No bot token. No intents. No Gateway.
+
+Code: https://github.com/Molecule-AI/molecule-core/tree/main/docs/blog/2026-04-21-discord-adapter
+Launch post: https://moleculesai.app/blog/discord-adapter-launch
+```
+
+---
+
+## Hacker News
+
+**Title:** Molecule AI — Discord adapter via webhook (no bot account, no Gateway)
+
+**Body:**
+
+Built a Discord integration for Molecule AI workspaces that works with just a webhook URL — no Discord Developer Portal setup, no bot token, no Gateway connection.
+
+**Why**
+
+Our own team lives in Discord. We wanted a lightweight way to have an AI agent respond to slash commands and post updates without the overhead of a full bot app. Realized Discord's native webhook primitives cover both inbound (slash command delivery) and outbound (channel messages) if you proxy through a platform endpoint.
+
+**How it works**
+
+- Outbound: agent POSTs to a Discord webhook URL (standard, URL contains the auth token)
+- Inbound: Discord delivers slash command payloads to a platform endpoint; platform fans out to the relevant workspace via A2A
+- No bot account required. No Gateway. Works behind NAT — the agent only initiates outbound connections.
+
+The adapter lives in the MCP server (`mcp-server/src/tools/channels/discord.go`) alongside Telegram and other channel adapters. Each workspace configures its own Discord channel with a webhook URL.
+
+**Links**
+
+- Docs: https://moleculesai.app/blog/discord-adapter-launch
+- Code + examples: https://github.com/Molecule-AI/molecule-core/tree/main/docs/blog/2026-04-21-discord-adapter
+- PR: https://github.com/Molecule-AI/molecule-core/pull/656
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 337c168c..a0483d74 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -152,7 +152,7 @@ The response includes your bearer token — save it now. It is shown only once.
 ```bash
 AGENT_TOKEN="the-token-from-step-2"
 
-curl "$PLATFORM/workspaces/$WORKSPACE_ID/secrets/values" \
+curl "$PLATFORM/workspaces/$WORKSPACE_ID/secrets" \
   -H "Authorization: Bearer $AGENT_TOKEN"
 ```
 
diff --git a/workspace-server/internal/handlers/a2a_proxy.go b/workspace-server/internal/handlers/a2a_proxy.go
index fd6cd50f..18991f38 100644
--- a/workspace-server/internal/handlers/a2a_proxy.go
+++ b/workspace-server/internal/handlers/a2a_proxy.go
@@ -1,5 +1,10 @@
 package handlers
 
+// a2a_proxy.go — A2A JSON-RPC proxy: routes canvas and agent-to-agent
+// requests to workspace containers. Core proxy path, URL resolution,
+// payload normalization, and HTTP dispatch. Error handling, logging, and
+// SSRF helpers live in a2a_proxy_helpers.go.
+
 import (
 	"bytes"
 	"context"
@@ -9,9 +14,7 @@ import (
 	"fmt"
 	"io"
 	"log"
-	"net"
 	"net/http"
-	"net/url"
 	"os"
 	"strconv"
 	"strings"
@@ -20,7 +23,6 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 )
@@ -473,402 +475,3 @@ func (h *WorkspaceHandler) dispatchA2A(ctx context.Context, agentURL string, bod
 	resp, doErr := a2aClient.Do(req)
 	return resp, cancel, doErr
 }
-
-// proxyDispatchBuildError is a sentinel wrapper for failures inside
-// http.NewRequestWithContext. handleA2ADispatchError unwraps it to emit the
-// "failed to create proxy request" 500 instead of the standard 502/503 paths.
-type proxyDispatchBuildError struct{ err error }
-
-func (e *proxyDispatchBuildError) Error() string { return e.err.Error() }
-
-// handleA2ADispatchError translates a forward-call failure into a proxyA2AError,
-// runs the reactive container-health check, and (when `logActivity` is true)
-// schedules a detached LogActivity goroutine for the failed attempt.
-func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int, logActivity bool) (int, []byte, *proxyA2AError) {
-	// Build-time failure (couldn't even create the http.Request) — return
-	// a 500 without the reactive-health / busy-retry paths.
-	if buildErr, ok := err.(*proxyDispatchBuildError); ok {
-		_ = buildErr
-		return 0, nil, &proxyA2AError{
-			Status:   http.StatusInternalServerError,
-			Response: gin.H{"error": "failed to create proxy request"},
-		}
-	}
-
-	log.Printf("ProxyA2A forward error: %v", err)
-
-	containerDead := h.maybeMarkContainerDead(ctx, workspaceID)
-
-	if logActivity {
-		h.logA2AFailure(ctx, workspaceID, callerID, body, a2aMethod, err, durationMs)
-	}
-	if containerDead {
-		return 0, nil, &proxyA2AError{
-			Status:   http.StatusServiceUnavailable,
-			Response: gin.H{"error": "workspace agent unreachable — container restart triggered", "restarting": true},
-		}
-	}
-	// Container is alive but upstream Do() failed with a timeout/EOF-
-	// shaped error — the agent is most likely mid-synthesis on a
-	// previous request (single-threaded main loop). Surface as 503
-	// Busy with a Retry-After hint so callers can distinguish this
-	// from a real unreachable-agent (502) and retry with backoff.
-	// Issue #110.
-	if isUpstreamBusyError(err) {
-		return 0, nil, &proxyA2AError{
-			Status:  http.StatusServiceUnavailable,
-			Headers: map[string]string{"Retry-After": strconv.Itoa(busyRetryAfterSeconds)},
-			Response: gin.H{
-				"error":       "workspace agent busy — retry after a short backoff",
-				"busy":        true,
-				"retry_after": busyRetryAfterSeconds,
-			},
-		}
-	}
-	return 0, nil, &proxyA2AError{
-		Status:   http.StatusBadGateway,
-		Response: gin.H{"error": "failed to reach workspace agent"},
-	}
-}
-
-// maybeMarkContainerDead runs the reactive health check after a forward error.
-// If the workspace's Docker container is no longer running (and the workspace
-// isn't external), it marks the workspace offline, clears Redis state,
-// broadcasts WORKSPACE_OFFLINE, and triggers an async restart. Returns true
-// when the container was found dead.
-func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspaceID string) bool {
-	var wsRuntime string
-	db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsRuntime)
-	if h.provisioner == nil || wsRuntime == "external" {
-		return false
-	}
-	running, inspectErr := h.provisioner.IsRunning(ctx, workspaceID)
-	if inspectErr != nil {
-		// Transient Docker-daemon error (timeout, socket EOF, etc.). Post-
-		// #386, IsRunning returns (true, err) in this case — caller stays
-		// on the alive path and does not trigger a restart cascade. Log
-		// so the defect is visible without being destructive.
-		log.Printf("ProxyA2A: IsRunning for %s returned transient error (assuming alive): %v", workspaceID, inspectErr)
-	}
-	if running {
-		return false
-	}
-	log.Printf("ProxyA2A: container for %s is dead — marking offline and triggering restart", workspaceID)
-	if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'offline', updated_at = now() WHERE id = $1 AND status NOT IN ('removed', 'provisioning')`, workspaceID); err != nil {
-		log.Printf("ProxyA2A: failed to mark workspace %s offline: %v", workspaceID, err)
-	}
-	db.ClearWorkspaceKeys(ctx, workspaceID)
-	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_OFFLINE", workspaceID, map[string]interface{}{})
-	go h.RestartByID(workspaceID)
-	return true
-}
-
-// logA2AFailure records a failed A2A attempt to activity_logs in a detached
-// goroutine (the request context may already be done by the time it runs).
-func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int) {
-	errMsg := err.Error()
-	var errWsName string
-	db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&errWsName)
-	if errWsName == "" {
-		errWsName = workspaceID
-	}
-	summary := "A2A request to " + errWsName + " failed: " + errMsg
-	go func(parent context.Context) {
-		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
-		defer cancel()
-		LogActivity(logCtx, h.broadcaster, ActivityParams{
-			WorkspaceID:  workspaceID,
-			ActivityType: "a2a_receive",
-			SourceID:     nilIfEmpty(callerID),
-			TargetID:     &workspaceID,
-			Method:       &a2aMethod,
-			Summary:      &summary,
-			RequestBody:  json.RawMessage(body),
-			DurationMs:   &durationMs,
-			Status:       "error",
-			ErrorDetail:  &errMsg,
-		})
-	}(ctx)
-}
-
-// logA2ASuccess records a successful A2A round-trip and (for canvas-initiated
-// 2xx/3xx responses) broadcasts an A2A_RESPONSE event so the frontend can
-// receive the reply without polling.
-func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, callerID string, body, respBody []byte, a2aMethod string, statusCode, durationMs int) {
-	logStatus := "ok"
-	if statusCode >= 400 {
-		logStatus = "error"
-	}
-	var wsNameForLog string
-	db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsNameForLog)
-	if wsNameForLog == "" {
-		wsNameForLog = workspaceID
-	}
-
-	// #817: track outbound activity on the CALLER so orchestrators can detect
-	// silent workspaces. Only update when callerID is a real workspace (not
-	// canvas, not a system caller) and the target returned 2xx/3xx.
-	if callerID != "" && !isSystemCaller(callerID) && statusCode < 400 {
-		go func() {
-			bgCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-			defer cancel()
-			if _, err := db.DB.ExecContext(bgCtx,
-				`UPDATE workspaces SET last_outbound_at = NOW() WHERE id = $1`, callerID); err != nil {
-				log.Printf("last_outbound_at update failed for %s: %v", callerID, err)
-			}
-		}()
-	}
-	summary := a2aMethod + " → " + wsNameForLog
-	go func(parent context.Context) {
-		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
-		defer cancel()
-		LogActivity(logCtx, h.broadcaster, ActivityParams{
-			WorkspaceID:  workspaceID,
-			ActivityType: "a2a_receive",
-			SourceID:     nilIfEmpty(callerID),
-			TargetID:     &workspaceID,
-			Method:       &a2aMethod,
-			Summary:      &summary,
-			RequestBody:  json.RawMessage(body),
-			ResponseBody: json.RawMessage(respBody),
-			DurationMs:   &durationMs,
-			Status:       logStatus,
-		})
-	}(ctx)
-
-	if callerID == "" && statusCode < 400 {
-		h.broadcaster.BroadcastOnly(workspaceID, "A2A_RESPONSE", map[string]interface{}{
-			"response_body": json.RawMessage(respBody),
-			"method":        a2aMethod,
-			"duration_ms":   durationMs,
-		})
-	}
-}
-
-func nilIfEmpty(s string) *string {
-	if s == "" {
-		return nil
-	}
-	return &s
-}
-
-// validateCallerToken enforces the Phase 30.5 auth-token contract on the
-// caller of an A2A proxy request. Same lazy-bootstrap shape as
-// registry.requireWorkspaceToken: if the caller workspace has any live
-// token on file, the Authorization header is mandatory and must match;
-// if the caller has zero live tokens, they're grandfathered through
-// (their next /registry/register will mint their first token, after
-// which this branch never fires again for them).
-//
-// On auth failure this writes the 401 via c and returns an error so the
-// handler aborts without running the proxy.
-func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) error {
-	hasLive, err := wsauth.HasAnyLiveToken(ctx, db.DB, callerID)
-	if err != nil {
-		// Fail-open here matches the heartbeat path — A2A caller auth is
-		// defense-in-depth on top of access-control hierarchy, not the
-		// sole gate on the secret material. A DB hiccup shouldn't take
-		// the whole A2A path down.
-		log.Printf("wsauth: caller HasAnyLiveToken(%s) failed: %v — allowing A2A", callerID, err)
-		return nil
-	}
-	if !hasLive {
-		return nil // legacy / pre-upgrade caller
-	}
-	tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
-	if tok == "" {
-		c.JSON(http.StatusUnauthorized, gin.H{"error": "missing caller auth token"})
-		return errInvalidCallerToken
-	}
-	if err := wsauth.ValidateToken(ctx, db.DB, callerID, tok); err != nil {
-		c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid caller auth token"})
-		return err
-	}
-	return nil
-}
-
-// errInvalidCallerToken is a sentinel for validateCallerToken's "missing
-// token" branch so the handler-level guard can detect it without string
-// matching (the wsauth errors are typed for the invalid case).
-var errInvalidCallerToken = errors.New("missing caller auth token")
-
-// extractAndUpsertTokenUsage parses LLM usage from a raw A2A response body
-// and persists it via upsertTokenUsage. Safe to call in a goroutine — logs
-// errors but never panics. ctx must already be detached from the request.
-func extractAndUpsertTokenUsage(ctx context.Context, workspaceID string, respBody []byte) {
-	in, out := parseUsageFromA2AResponse(respBody)
-	if in > 0 || out > 0 {
-		upsertTokenUsage(ctx, workspaceID, in, out)
-	}
-}
-
-// parseUsageFromA2AResponse extracts input_tokens / output_tokens from an A2A
-// JSON-RPC response. Inspects two locations in order of preference:
-//  1. result.usage — the JSON-RPC 2.0 result envelope from workspace agents.
-//  2. usage — top-level, for non-JSON-RPC or direct Anthropic-shaped payloads.
-//
-// Returns (0, 0) when no recognisable usage data is found.
-func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
-	if len(body) == 0 {
-		return 0, 0
-	}
-	var top map[string]json.RawMessage
-	if err := json.Unmarshal(body, &top); err != nil {
-		return 0, 0
-	}
-
-	// 1. result.usage (JSON-RPC 2.0 wrapper produced by workspace agents).
-	if rawResult, ok := top["result"]; ok {
-		var result map[string]json.RawMessage
-		if err := json.Unmarshal(rawResult, &result); err == nil {
-			if in, out, ok := readUsageMap(result); ok {
-				return in, out
-			}
-		}
-	}
-
-	// 2. Fallback: top-level usage (direct Anthropic or non-JSON-RPC response).
-	if in, out, ok := readUsageMap(top); ok {
-		return in, out
-	}
-	return 0, 0
-}
-
-// isSafeURL validates that a URL resolves to a publicly-routable address,
-// preventing A2A requests from being redirected to internal/cloud-metadata
-// infrastructure (SSRF, CWE-918). Workspace URLs come from DB/Redis caches
-// so we validate before making any outbound HTTP call.
-func isSafeURL(rawURL string) error {
-	u, err := url.Parse(rawURL)
-	if err != nil {
-		return fmt.Errorf("invalid URL: %w", err)
-	}
-	// Reject non-HTTP(S) schemes.
-	if u.Scheme != "http" && u.Scheme != "https" {
-		return fmt.Errorf("forbidden scheme: %s (only http/https allowed)", u.Scheme)
-	}
-	host := u.Hostname()
-	if host == "" {
-		return fmt.Errorf("empty hostname")
-	}
-	// Block direct IP addresses.
-	if ip := net.ParseIP(host); ip != nil {
-		if ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() {
-			return fmt.Errorf("forbidden loopback/unspecified IP: %s", ip)
-		}
-		if isPrivateOrMetadataIP(ip) {
-			return fmt.Errorf("forbidden private/metadata IP: %s", ip)
-		}
-		return nil
-	}
-	// For hostnames, resolve and validate each returned IP.
-	addrs, err := net.LookupHost(host)
-	if err != nil {
-		// DNS resolution failure — block it. Could be an internal hostname.
-		return fmt.Errorf("DNS resolution blocked for hostname: %s (%v)", host, err)
-	}
-	if len(addrs) == 0 {
-		return fmt.Errorf("DNS returned no addresses for: %s", host)
-	}
-	for _, addr := range addrs {
-		ip := net.ParseIP(addr)
-		if ip != nil && (ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || isPrivateOrMetadataIP(ip)) {
-			return fmt.Errorf("hostname %s resolves to forbidden IP: %s", host, ip)
-		}
-	}
-	return nil
-}
-
-// isPrivateOrMetadataIP returns true for cloud-metadata / loopback / link-local
-// ranges (always) and RFC-1918 / IPv6 ULA ranges (self-hosted only).
-//
-// In SaaS cross-EC2 mode (see saasMode() in registry.go) the tenant platform
-// and its workspaces share a VPC, so workspaces register with their
-// VPC-private IP — typically 172.31.x.x on AWS default VPCs. Blocking RFC-1918
-// unconditionally would reject every legitimate registration. Cloud metadata
-// (169.254.0.0/16, fe80::/10), loopback, and TEST-NET ranges stay blocked in
-// both modes; they are never a legitimate agent URL.
-//
-// Both IPv4 and IPv6 are checked. The previous implementation returned false
-// for every non-IPv4 input, which meant a registered `[::1]` or `[fe80::…]`
-// URL would bypass the SSRF gate entirely.
-func isPrivateOrMetadataIP(ip net.IP) bool {
-	// Always blocked — IPv4 cloud metadata + network-test ranges.
-	metadataRangesV4 := []string{
-		"169.254.0.0/16",  // link-local / IMDSv1-v2
-		"100.64.0.0/10",   // CGNAT — reachable via some VPC configs, not a legit agent URL
-		"192.0.2.0/24",    // TEST-NET-1
-		"198.51.100.0/24", // TEST-NET-2
-		"203.0.113.0/24",  // TEST-NET-3
-	}
-	// Always blocked — IPv6 cloud-metadata / loopback equivalents.
-	metadataRangesV6 := []string{
-		"::1/128",       // loopback
-		"fe80::/10",     // link-local (IMDS analogue)
-		"::ffff:0:0/96", // IPv4-mapped loopback (defence-in-depth; To4() below usually normalises first)
-	}
-	// RFC-1918 private — blocked in self-hosted, allowed in SaaS.
-	rfc1918RangesV4 := []string{
-		"10.0.0.0/8",
-		"172.16.0.0/12",
-		"192.168.0.0/16",
-	}
-	// RFC-4193 ULA — IPv6 analogue of RFC-1918. Same SaaS-mode treatment.
-	ulaRangesV6 := []string{
-		"fc00::/7",
-	}
-
-	contains := func(cidrs []string, target net.IP) bool {
-		for _, c := range cidrs {
-			_, n, err := net.ParseCIDR(c)
-			if err != nil {
-				continue
-			}
-			if n.Contains(target) {
-				return true
-			}
-		}
-		return false
-	}
-
-	// Prefer IPv4 semantics when the input is an IPv4 address encoded in any
-	// form (raw v4, ::ffff:a.b.c.d, etc.) — To4() normalises all of them.
-	if ip4 := ip.To4(); ip4 != nil {
-		if contains(metadataRangesV4, ip4) {
-			return true
-		}
-		if saasMode() {
-			return false
-		}
-		return contains(rfc1918RangesV4, ip4)
-	}
-
-	// True IPv6 path.
-	if contains(metadataRangesV6, ip) {
-		return true
-	}
-	if saasMode() {
-		return false
-	}
-	return contains(ulaRangesV6, ip)
-}
-
-// readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
-// Returns (0, 0, false) when the key is absent or contains no non-zero values.
-func readUsageMap(m map[string]json.RawMessage) (inputTokens, outputTokens int64, ok bool) {
-	rawUsage, has := m["usage"]
-	if !has {
-		return 0, 0, false
-	}
-	var usage struct {
-		InputTokens  int64 `json:"input_tokens"`
-		OutputTokens int64 `json:"output_tokens"`
-	}
-	if err := json.Unmarshal(rawUsage, &usage); err != nil {
-		return 0, 0, false
-	}
-	if usage.InputTokens == 0 && usage.OutputTokens == 0 {
-		return 0, 0, false
-	}
-	return usage.InputTokens, usage.OutputTokens, true
-}
diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go
new file mode 100644
index 00000000..1a87071a
--- /dev/null
+++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go
@@ -0,0 +1,421 @@
+package handlers
+
+// a2a_proxy_helpers.go — A2A proxy error handling, activity logging,
+// caller auth validation, token usage tracking, and SSRF safety checks.
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	"net"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
+	"github.com/gin-gonic/gin"
+)
+// proxyDispatchBuildError is a sentinel wrapper for failures inside
+// http.NewRequestWithContext. handleA2ADispatchError unwraps it to emit the
+// "failed to create proxy request" 500 instead of the standard 502/503 paths.
+type proxyDispatchBuildError struct{ err error }
+
+func (e *proxyDispatchBuildError) Error() string { return e.err.Error() }
+
+// handleA2ADispatchError translates a forward-call failure into a proxyA2AError,
+// runs the reactive container-health check, and (when `logActivity` is true)
+// schedules a detached LogActivity goroutine for the failed attempt.
+func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int, logActivity bool) (int, []byte, *proxyA2AError) {
+	// Build-time failure (couldn't even create the http.Request) — return
+	// a 500 without the reactive-health / busy-retry paths.
+	if buildErr, ok := err.(*proxyDispatchBuildError); ok {
+		_ = buildErr
+		return 0, nil, &proxyA2AError{
+			Status:   http.StatusInternalServerError,
+			Response: gin.H{"error": "failed to create proxy request"},
+		}
+	}
+
+	log.Printf("ProxyA2A forward error: %v", err)
+
+	containerDead := h.maybeMarkContainerDead(ctx, workspaceID)
+
+	if logActivity {
+		h.logA2AFailure(ctx, workspaceID, callerID, body, a2aMethod, err, durationMs)
+	}
+	if containerDead {
+		return 0, nil, &proxyA2AError{
+			Status:   http.StatusServiceUnavailable,
+			Response: gin.H{"error": "workspace agent unreachable — container restart triggered", "restarting": true},
+		}
+	}
+	// Container is alive but upstream Do() failed with a timeout/EOF-
+	// shaped error — the agent is most likely mid-synthesis on a
+	// previous request (single-threaded main loop). Surface as 503
+	// Busy with a Retry-After hint so callers can distinguish this
+	// from a real unreachable-agent (502) and retry with backoff.
+	// Issue #110.
+	if isUpstreamBusyError(err) {
+		return 0, nil, &proxyA2AError{
+			Status:  http.StatusServiceUnavailable,
+			Headers: map[string]string{"Retry-After": strconv.Itoa(busyRetryAfterSeconds)},
+			Response: gin.H{
+				"error":       "workspace agent busy — retry after a short backoff",
+				"busy":        true,
+				"retry_after": busyRetryAfterSeconds,
+			},
+		}
+	}
+	return 0, nil, &proxyA2AError{
+		Status:   http.StatusBadGateway,
+		Response: gin.H{"error": "failed to reach workspace agent"},
+	}
+}
+
+// maybeMarkContainerDead runs the reactive health check after a forward error.
+// If the workspace's Docker container is no longer running (and the workspace
+// isn't external), it marks the workspace offline, clears Redis state,
+// broadcasts WORKSPACE_OFFLINE, and triggers an async restart. Returns true
+// when the container was found dead.
+func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspaceID string) bool {
+	var wsRuntime string
+	db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsRuntime)
+	if h.provisioner == nil || wsRuntime == "external" {
+		return false
+	}
+	running, inspectErr := h.provisioner.IsRunning(ctx, workspaceID)
+	if inspectErr != nil {
+		// Transient Docker-daemon error (timeout, socket EOF, etc.). Post-
+		// #386, IsRunning returns (true, err) in this case — caller stays
+		// on the alive path and does not trigger a restart cascade. Log
+		// so the defect is visible without being destructive.
+		log.Printf("ProxyA2A: IsRunning for %s returned transient error (assuming alive): %v", workspaceID, inspectErr)
+	}
+	if running {
+		return false
+	}
+	log.Printf("ProxyA2A: container for %s is dead — marking offline and triggering restart", workspaceID)
+	if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'offline', updated_at = now() WHERE id = $1 AND status NOT IN ('removed', 'provisioning')`, workspaceID); err != nil {
+		log.Printf("ProxyA2A: failed to mark workspace %s offline: %v", workspaceID, err)
+	}
+	db.ClearWorkspaceKeys(ctx, workspaceID)
+	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_OFFLINE", workspaceID, map[string]interface{}{})
+	go h.RestartByID(workspaceID)
+	return true
+}
+
+// logA2AFailure records a failed A2A attempt to activity_logs in a detached
+// goroutine (the request context may already be done by the time it runs).
+func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int) {
+	errMsg := err.Error()
+	var errWsName string
+	db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&errWsName)
+	if errWsName == "" {
+		errWsName = workspaceID
+	}
+	summary := "A2A request to " + errWsName + " failed: " + errMsg
+	go func(parent context.Context) {
+		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
+		defer cancel()
+		LogActivity(logCtx, h.broadcaster, ActivityParams{
+			WorkspaceID:  workspaceID,
+			ActivityType: "a2a_receive",
+			SourceID:     nilIfEmpty(callerID),
+			TargetID:     &workspaceID,
+			Method:       &a2aMethod,
+			Summary:      &summary,
+			RequestBody:  json.RawMessage(body),
+			DurationMs:   &durationMs,
+			Status:       "error",
+			ErrorDetail:  &errMsg,
+		})
+	}(ctx)
+}
+
+// logA2ASuccess records a successful A2A round-trip and (for canvas-initiated
+// 2xx/3xx responses) broadcasts an A2A_RESPONSE event so the frontend can
+// receive the reply without polling.
+func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, callerID string, body, respBody []byte, a2aMethod string, statusCode, durationMs int) {
+	logStatus := "ok"
+	if statusCode >= 400 {
+		logStatus = "error"
+	}
+	var wsNameForLog string
+	db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsNameForLog)
+	if wsNameForLog == "" {
+		wsNameForLog = workspaceID
+	}
+
+	// #817: track outbound activity on the CALLER so orchestrators can detect
+	// silent workspaces. Only update when callerID is a real workspace (not
+	// canvas, not a system caller) and the target returned 2xx/3xx.
+	if callerID != "" && !isSystemCaller(callerID) && statusCode < 400 {
+		go func() {
+			bgCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer cancel()
+			if _, err := db.DB.ExecContext(bgCtx,
+				`UPDATE workspaces SET last_outbound_at = NOW() WHERE id = $1`, callerID); err != nil {
+				log.Printf("last_outbound_at update failed for %s: %v", callerID, err)
+			}
+		}()
+	}
+	summary := a2aMethod + " → " + wsNameForLog
+	go func(parent context.Context) {
+		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
+		defer cancel()
+		LogActivity(logCtx, h.broadcaster, ActivityParams{
+			WorkspaceID:  workspaceID,
+			ActivityType: "a2a_receive",
+			SourceID:     nilIfEmpty(callerID),
+			TargetID:     &workspaceID,
+			Method:       &a2aMethod,
+			Summary:      &summary,
+			RequestBody:  json.RawMessage(body),
+			ResponseBody: json.RawMessage(respBody),
+			DurationMs:   &durationMs,
+			Status:       logStatus,
+		})
+	}(ctx)
+
+	if callerID == "" && statusCode < 400 {
+		h.broadcaster.BroadcastOnly(workspaceID, "A2A_RESPONSE", map[string]interface{}{
+			"response_body": json.RawMessage(respBody),
+			"method":        a2aMethod,
+			"duration_ms":   durationMs,
+		})
+	}
+}
+
+func nilIfEmpty(s string) *string {
+	if s == "" {
+		return nil
+	}
+	return &s
+}
+
+// validateCallerToken enforces the Phase 30.5 auth-token contract on the
+// caller of an A2A proxy request. Same lazy-bootstrap shape as
+// registry.requireWorkspaceToken: if the caller workspace has any live
+// token on file, the Authorization header is mandatory and must match;
+// if the caller has zero live tokens, they're grandfathered through
+// (their next /registry/register will mint their first token, after
+// which this branch never fires again for them).
+//
+// On auth failure this writes the 401 via c and returns an error so the
+// handler aborts without running the proxy.
+func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) error {
+	hasLive, err := wsauth.HasAnyLiveToken(ctx, db.DB, callerID)
+	if err != nil {
+		// Fail-open here matches the heartbeat path — A2A caller auth is
+		// defense-in-depth on top of access-control hierarchy, not the
+		// sole gate on the secret material. A DB hiccup shouldn't take
+		// the whole A2A path down.
+		log.Printf("wsauth: caller HasAnyLiveToken(%s) failed: %v — allowing A2A", callerID, err)
+		return nil
+	}
+	if !hasLive {
+		return nil // legacy / pre-upgrade caller
+	}
+	tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
+	if tok == "" {
+		c.JSON(http.StatusUnauthorized, gin.H{"error": "missing caller auth token"})
+		return errInvalidCallerToken
+	}
+	if err := wsauth.ValidateToken(ctx, db.DB, callerID, tok); err != nil {
+		c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid caller auth token"})
+		return err
+	}
+	return nil
+}
+
+// errInvalidCallerToken is a sentinel for validateCallerToken's "missing
+// token" branch so the handler-level guard can detect it without string
+// matching (the wsauth errors are typed for the invalid case).
+var errInvalidCallerToken = errors.New("missing caller auth token")
+
+// extractAndUpsertTokenUsage parses LLM usage from a raw A2A response body
+// and persists it via upsertTokenUsage. Safe to call in a goroutine — logs
+// errors but never panics. ctx must already be detached from the request.
+func extractAndUpsertTokenUsage(ctx context.Context, workspaceID string, respBody []byte) {
+	in, out := parseUsageFromA2AResponse(respBody)
+	if in > 0 || out > 0 {
+		upsertTokenUsage(ctx, workspaceID, in, out)
+	}
+}
+
+// parseUsageFromA2AResponse extracts input_tokens / output_tokens from an A2A
+// JSON-RPC response. Inspects two locations in order of preference:
+//  1. result.usage — the JSON-RPC 2.0 result envelope from workspace agents.
+//  2. usage — top-level, for non-JSON-RPC or direct Anthropic-shaped payloads.
+//
+// Returns (0, 0) when no recognisable usage data is found.
+func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
+	if len(body) == 0 {
+		return 0, 0
+	}
+	var top map[string]json.RawMessage
+	if err := json.Unmarshal(body, &top); err != nil {
+		return 0, 0
+	}
+
+	// 1. result.usage (JSON-RPC 2.0 wrapper produced by workspace agents).
+	if rawResult, ok := top["result"]; ok {
+		var result map[string]json.RawMessage
+		if err := json.Unmarshal(rawResult, &result); err == nil {
+			if in, out, ok := readUsageMap(result); ok {
+				return in, out
+			}
+		}
+	}
+
+	// 2. Fallback: top-level usage (direct Anthropic or non-JSON-RPC response).
+	if in, out, ok := readUsageMap(top); ok {
+		return in, out
+	}
+	return 0, 0
+}
+
+// isSafeURL validates that a URL resolves to a publicly-routable address,
+// preventing A2A requests from being redirected to internal/cloud-metadata
+// infrastructure (SSRF, CWE-918). Workspace URLs come from DB/Redis caches
+// so we validate before making any outbound HTTP call.
+func isSafeURL(rawURL string) error {
+	u, err := url.Parse(rawURL)
+	if err != nil {
+		return fmt.Errorf("invalid URL: %w", err)
+	}
+	// Reject non-HTTP(S) schemes.
+	if u.Scheme != "http" && u.Scheme != "https" {
+		return fmt.Errorf("forbidden scheme: %s (only http/https allowed)", u.Scheme)
+	}
+	host := u.Hostname()
+	if host == "" {
+		return fmt.Errorf("empty hostname")
+	}
+	// Block direct IP addresses.
+	if ip := net.ParseIP(host); ip != nil {
+		if ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() {
+			return fmt.Errorf("forbidden loopback/unspecified IP: %s", ip)
+		}
+		if isPrivateOrMetadataIP(ip) {
+			return fmt.Errorf("forbidden private/metadata IP: %s", ip)
+		}
+		return nil
+	}
+	// For hostnames, resolve and validate each returned IP.
+	addrs, err := net.LookupHost(host)
+	if err != nil {
+		// DNS resolution failure — block it. Could be an internal hostname.
+		return fmt.Errorf("DNS resolution blocked for hostname: %s (%v)", host, err)
+	}
+	if len(addrs) == 0 {
+		return fmt.Errorf("DNS returned no addresses for: %s", host)
+	}
+	for _, addr := range addrs {
+		ip := net.ParseIP(addr)
+		if ip != nil && (ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || isPrivateOrMetadataIP(ip)) {
+			return fmt.Errorf("hostname %s resolves to forbidden IP: %s", host, ip)
+		}
+	}
+	return nil
+}
+
+// isPrivateOrMetadataIP returns true for cloud-metadata / loopback / link-local
+// ranges (always) and RFC-1918 / IPv6 ULA ranges (self-hosted only).
+//
+// In SaaS cross-EC2 mode (see saasMode() in registry.go) the tenant platform
+// and its workspaces share a VPC, so workspaces register with their
+// VPC-private IP — typically 172.31.x.x on AWS default VPCs. Blocking RFC-1918
+// unconditionally would reject every legitimate registration. Cloud metadata
+// (169.254.0.0/16, fe80::/10), loopback, and TEST-NET ranges stay blocked in
+// both modes; they are never a legitimate agent URL.
+//
+// Both IPv4 and IPv6 are checked. The previous implementation returned false
+// for every non-IPv4 input, which meant a registered [::1] or [fe80::…]
+// URL would bypass the SSRF gate entirely.
+func isPrivateOrMetadataIP(ip net.IP) bool {
+	// Always blocked — IPv4 cloud metadata + network-test ranges.
+	metadataRangesV4 := []string{
+		"169.254.0.0/16",  // link-local / IMDSv1-v2
+		"100.64.0.0/10",   // CGNAT — reachable via some VPC configs, not a legit agent URL
+		"192.0.2.0/24",    // TEST-NET-1
+		"198.51.100.0/24", // TEST-NET-2
+		"203.0.113.0/24",  // TEST-NET-3
+	}
+	// Always blocked — IPv6 cloud-metadata / loopback equivalents.
+	metadataRangesV6 := []string{
+		"::1/128",       // loopback
+		"fe80::/10",     // link-local (IMDS analogue)
+		"::ffff:0:0/96", // IPv4-mapped loopback (defence-in-depth; To4() below usually normalises first)
+	}
+	// RFC-1918 private — blocked in self-hosted, allowed in SaaS.
+	rfc1918RangesV4 := []string{
+		"10.0.0.0/8",
+		"172.16.0.0/12",
+		"192.168.0.0/16",
+	}
+	// RFC-4193 ULA — IPv6 analogue of RFC-1918. Same SaaS-mode treatment.
+	ulaRangesV6 := []string{
+		"fc00::/7",
+	}
+
+	contains := func(cidrs []string, target net.IP) bool {
+		for _, c := range cidrs {
+			_, n, err := net.ParseCIDR(c)
+			if err != nil {
+				continue
+			}
+			if n.Contains(target) {
+				return true
+			}
+		}
+		return false
+	}
+
+	// Prefer IPv4 semantics when the input is an IPv4 address encoded in any
+	// form (raw v4, ::ffff:a.b.c.d, etc.) — To4() normalises all of them.
+	if ip4 := ip.To4(); ip4 != nil {
+		if contains(metadataRangesV4, ip4) {
+			return true
+		}
+		if saasMode() {
+			return false
+		}
+		return contains(rfc1918RangesV4, ip4)
+	}
+
+	// True IPv6 path.
+	if contains(metadataRangesV6, ip) {
+		return true
+	}
+	if saasMode() {
+		return false
+	}
+	return contains(ulaRangesV6, ip)
+}
+
+// readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
+// Returns (0, 0, false) when the key is absent or contains no non-zero values.
+func readUsageMap(m map[string]json.RawMessage) (inputTokens, outputTokens int64, ok bool) {
+	rawUsage, has := m["usage"]
+	if !has {
+		return 0, 0, false
+	}
+	var usage struct {
+		InputTokens  int64 `json:"input_tokens"`
+		OutputTokens int64 `json:"output_tokens"`
+	}
+	if err := json.Unmarshal(rawUsage, &usage); err != nil {
+		return 0, 0, false
+	}
+	if usage.InputTokens == 0 && usage.OutputTokens == 0 {
+		return 0, 0, false
+	}
+	return usage.InputTokens, usage.OutputTokens, true
+}
diff --git a/workspace-server/internal/handlers/mcp.go b/workspace-server/internal/handlers/mcp.go
index 8d7ac598..bc0b9668 100644
--- a/workspace-server/internal/handlers/mcp.go
+++ b/workspace-server/internal/handlers/mcp.go
@@ -1,6 +1,10 @@
 package handlers
 
-// Package handlers — MCP bridge for opencode integration (#800, #809, #810).
+// mcp.go — MCP bridge protocol handling: JSON-RPC types, handler struct,
+// tool definitions, HTTP endpoints (Call, Stream), and RPC dispatch.
+// Tool implementations live in mcp_tools.go.
+//
+// MCP bridge for opencode integration (#800, #809, #810).
 //
 // Exposes the same 8 A2A tools as workspace/a2a_mcp_server.py but
 // served directly from the platform over HTTP so CLI runtimes running
@@ -20,24 +24,16 @@ package handlers
 //       MOLECULE_MCP_ALLOW_SEND_MESSAGE=true.
 
 import (
-	"bytes"
 	"context"
 	"database/sql"
 	"encoding/json"
 	"fmt"
-	"io"
-	"log"
 	"net/http"
 	"os"
-	"strings"
 	"time"
 
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
 	"github.com/gin-gonic/gin"
-	"github.com/google/uuid"
 )
 
 // mcpProtocolVersion is the MCP spec version this server implements.
@@ -389,546 +385,3 @@ func (h *MCPHandler) dispatch(ctx context.Context, workspaceID, toolName string,
 		return "", fmt.Errorf("unknown tool: %s", toolName)
 	}
 }
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Tool implementations
-// ─────────────────────────────────────────────────────────────────────────────
-
-func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (string, error) {
-	var parentID sql.NullString
-	err := h.database.QueryRowContext(ctx,
-		`SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID,
-	).Scan(&parentID)
-	if err == sql.ErrNoRows {
-		return "", fmt.Errorf("workspace not found")
-	}
-	if err != nil {
-		return "", fmt.Errorf("lookup failed: %w", err)
-	}
-
-	type peer struct {
-		ID     string `json:"id"`
-		Name   string `json:"name"`
-		Role   string `json:"role"`
-		Status string `json:"status"`
-		Tier   int    `json:"tier"`
-	}
-
-	var peers []peer
-
-	scanPeers := func(rows *sql.Rows) error {
-		defer rows.Close()
-		for rows.Next() {
-			var p peer
-			if err := rows.Scan(&p.ID, &p.Name, &p.Role, &p.Status, &p.Tier); err != nil {
-				return err
-			}
-			peers = append(peers, p)
-		}
-		return rows.Err()
-	}
-
-	const cols = `SELECT w.id, w.name, COALESCE(w.role,''), w.status, w.tier`
-
-	// Siblings
-	if parentID.Valid {
-		rows, err := h.database.QueryContext(ctx,
-			cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
-			parentID.String, workspaceID)
-		if err == nil {
-			_ = scanPeers(rows)
-		}
-	} else {
-		rows, err := h.database.QueryContext(ctx,
-			cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
-			workspaceID)
-		if err == nil {
-			_ = scanPeers(rows)
-		}
-	}
-
-	// Children
-	{
-		rows, err := h.database.QueryContext(ctx,
-			cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.status != 'removed'`,
-			workspaceID)
-		if err == nil {
-			_ = scanPeers(rows)
-		}
-	}
-
-	// Parent
-	if parentID.Valid {
-		rows, err := h.database.QueryContext(ctx,
-			cols+` FROM workspaces w WHERE w.id = $1 AND w.status != 'removed'`,
-			parentID.String)
-		if err == nil {
-			_ = scanPeers(rows)
-		}
-	}
-
-	if len(peers) == 0 {
-		return "No peers found.", nil
-	}
-
-	b, _ := json.MarshalIndent(peers, "", "  ")
-	return string(b), nil
-}
-
-func (h *MCPHandler) toolGetWorkspaceInfo(ctx context.Context, workspaceID string) (string, error) {
-	var id, name, role, status string
-	var tier int
-	var parentID sql.NullString
-
-	err := h.database.QueryRowContext(ctx, `
-		SELECT id, name, COALESCE(role,''), tier, status, parent_id
-		FROM workspaces WHERE id = $1
-	`, workspaceID).Scan(&id, &name, &role, &tier, &status, &parentID)
-	if err == sql.ErrNoRows {
-		return "", fmt.Errorf("workspace not found")
-	}
-	if err != nil {
-		return "", fmt.Errorf("lookup failed: %w", err)
-	}
-
-	info := map[string]interface{}{
-		"id":     id,
-		"name":   name,
-		"role":   role,
-		"tier":   tier,
-		"status": status,
-	}
-	if parentID.Valid {
-		info["parent_id"] = parentID.String
-	}
-	b, _ := json.MarshalIndent(info, "", "  ")
-	return string(b), nil
-}
-
-func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args map[string]interface{}, timeout time.Duration) (string, error) {
-	targetID, _ := args["workspace_id"].(string)
-	task, _ := args["task"].(string)
-	if targetID == "" {
-		return "", fmt.Errorf("workspace_id is required")
-	}
-	if task == "" {
-		return "", fmt.Errorf("task is required")
-	}
-
-	if !registry.CanCommunicate(callerID, targetID) {
-		return "", fmt.Errorf("workspace %s is not authorised to communicate with %s", callerID, targetID)
-	}
-
-	agentURL, err := mcpResolveURL(ctx, h.database, targetID)
-	if err != nil {
-		return "", err
-	}
-	// SSRF defence: reject private/metadata URLs before making outbound call.
-	if err := isSafeURL(agentURL); err != nil {
-		return "", fmt.Errorf("invalid workspace URL: %w", err)
-	}
-
-	a2aBody, err := json.Marshal(map[string]interface{}{
-		"jsonrpc": "2.0",
-		"id":      uuid.New().String(),
-		"method":  "message/send",
-		"params": map[string]interface{}{
-			"message": map[string]interface{}{
-				"role":      "user",
-				"parts":     []map[string]interface{}{{"type": "text", "text": task}},
-				"messageId": uuid.New().String(),
-			},
-		},
-	})
-	if err != nil {
-		return "", fmt.Errorf("failed to build A2A request: %w", err)
-	}
-
-	reqCtx, cancel := context.WithTimeout(ctx, timeout)
-	defer cancel()
-
-	httpReq, err := http.NewRequestWithContext(reqCtx, "POST", agentURL+"/a2a", bytes.NewReader(a2aBody))
-	if err != nil {
-		return "", fmt.Errorf("failed to create request: %w", err)
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-	// X-Workspace-ID identifies this caller to the A2A proxy. The /workspaces/:id/a2a
-	// endpoint is intentionally outside WorkspaceAuth (agents do not hold bearer tokens
-	// to peer workspaces). Access control is enforced by CanCommunicate above, which
-	// already validated callerID → targetID before this request is constructed.
-	// callerID was authenticated by WorkspaceAuth on the MCP bridge entry point,
-	// so this header reflects a verified caller identity, not a spoofable value.
-	httpReq.Header.Set("X-Workspace-ID", callerID)
-
-	resp, err := http.DefaultClient.Do(httpReq)
-	if err != nil {
-		return "", fmt.Errorf("A2A call failed: %w", err)
-	}
-	defer func() { _ = resp.Body.Close() }()
-
-	body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
-	if err != nil {
-		return "", fmt.Errorf("failed to read response: %w", err)
-	}
-
-	return extractA2AText(body), nil
-}
-
-func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
-	targetID, _ := args["workspace_id"].(string)
-	task, _ := args["task"].(string)
-	if targetID == "" {
-		return "", fmt.Errorf("workspace_id is required")
-	}
-	if task == "" {
-		return "", fmt.Errorf("task is required")
-	}
-
-	if !registry.CanCommunicate(callerID, targetID) {
-		return "", fmt.Errorf("workspace %s is not authorised to communicate with %s", callerID, targetID)
-	}
-
-	taskID := uuid.New().String()
-
-	// Fire and forget in a detached goroutine. Use a background context so
-	// the call is not cancelled when the HTTP request completes.
-	go func() {
-		bgCtx, cancel := context.WithTimeout(context.Background(), mcpAsyncCallTimeout)
-		defer cancel()
-
-		agentURL, err := mcpResolveURL(bgCtx, h.database, targetID)
-		if err != nil {
-			log.Printf("MCPHandler.delegate_task_async: resolve URL for %s: %v", targetID, err)
-			return
-		}
-		// SSRF defence: reject private/metadata URLs before making outbound call.
-		if err := isSafeURL(agentURL); err != nil {
-			log.Printf("MCPHandler.delegate_task_async: unsafe URL for %s: %v", targetID, err)
-			return
-		}
-
-		a2aBody, _ := json.Marshal(map[string]interface{}{
-			"jsonrpc": "2.0",
-			"id":      taskID,
-			"method":  "message/send",
-			"params": map[string]interface{}{
-				"message": map[string]interface{}{
-					"role":      "user",
-					"parts":     []map[string]interface{}{{"type": "text", "text": task}},
-					"messageId": uuid.New().String(),
-				},
-			},
-		})
-
-		httpReq, err := http.NewRequestWithContext(bgCtx, "POST", agentURL+"/a2a", bytes.NewReader(a2aBody))
-		if err != nil {
-			log.Printf("MCPHandler.delegate_task_async: create request: %v", err)
-			return
-		}
-		httpReq.Header.Set("Content-Type", "application/json")
-		httpReq.Header.Set("X-Workspace-ID", callerID)
-
-		resp, err := http.DefaultClient.Do(httpReq)
-		if err != nil {
-			log.Printf("MCPHandler.delegate_task_async: A2A call to %s: %v", targetID, err)
-			return
-		}
-		defer func() { _ = resp.Body.Close() }()
-		// Drain response so the connection can be reused.
-		_, _ = io.Copy(io.Discard, resp.Body)
-	}()
-
-	return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, taskID, targetID), nil
-}
-
-func (h *MCPHandler) toolCheckTaskStatus(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
-	targetID, _ := args["workspace_id"].(string)
-	taskID, _ := args["task_id"].(string)
-	if targetID == "" {
-		return "", fmt.Errorf("workspace_id is required")
-	}
-	if taskID == "" {
-		return "", fmt.Errorf("task_id is required")
-	}
-
-	var status, errorDetail sql.NullString
-	var responseBody []byte
-
-	err := h.database.QueryRowContext(ctx, `
-		SELECT status, error_detail, response_body
-		FROM activity_logs
-		WHERE workspace_id = $1
-		  AND target_id = $2
-		  AND request_body->>'delegation_id' = $3
-		ORDER BY created_at DESC
-		LIMIT 1
-	`, callerID, targetID, taskID).Scan(&status, &errorDetail, &responseBody)
-	if err == sql.ErrNoRows {
-		return fmt.Sprintf(`{"task_id":%q,"status":"not_found","note":"task not tracked or not yet dispatched"}`, taskID), nil
-	}
-	if err != nil {
-		return "", fmt.Errorf("status lookup failed: %w", err)
-	}
-
-	result := map[string]interface{}{
-		"task_id":   taskID,
-		"status":    status.String,
-		"target_id": targetID,
-	}
-	if errorDetail.Valid && errorDetail.String != "" {
-		result["error"] = errorDetail.String
-	}
-	if len(responseBody) > 0 {
-		result["result"] = extractA2AText(responseBody)
-	}
-	b, _ := json.MarshalIndent(result, "", "  ")
-	return string(b), nil
-}
-
-func (h *MCPHandler) toolSendMessageToUser(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
-	message, _ := args["message"].(string)
-	if message == "" {
-		return "", fmt.Errorf("message is required")
-	}
-
-	// Check send_message_to_user is enabled (C3).
-	if os.Getenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE") != "true" {
-		return "", fmt.Errorf("send_message_to_user is not enabled on this MCP bridge (set MOLECULE_MCP_ALLOW_SEND_MESSAGE=true)")
-	}
-
-	var wsName string
-	err := h.database.QueryRowContext(ctx,
-		`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`, workspaceID,
-	).Scan(&wsName)
-	if err != nil {
-		return "", fmt.Errorf("workspace not found")
-	}
-
-	h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", map[string]interface{}{
-		"message":      message,
-		"workspace_id": workspaceID,
-		"name":         wsName,
-	})
-
-	return "Message sent.", nil
-}
-
-
-func (h *MCPHandler) toolCommitMemory(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
-	content, _ := args["content"].(string)
-	scope, _ := args["scope"].(string)
-	if content == "" {
-		return "", fmt.Errorf("content is required")
-	}
-	if scope == "" {
-		scope = "LOCAL"
-	}
-
-	// C3: GLOBAL scope is blocked on the MCP bridge.
-	if scope == "GLOBAL" {
-		return "", fmt.Errorf("GLOBAL scope is not permitted via the MCP bridge — use LOCAL or TEAM")
-	}
-	if scope != "LOCAL" && scope != "TEAM" {
-		return "", fmt.Errorf("scope must be LOCAL or TEAM")
-	}
-
-	memoryID := uuid.New().String()
-	// SAFE-T1201 (#838): scrub known credential patterns before persistence so
-	// plain-text API keys pulled in via tool responses can't land in the
-	// memories table (and leak into shared TEAM scope). Reuses redactSecrets
-	// already shipped for the HTTP path in PR #881 — this was the MCP-bridge
-	// sibling the original fix missed. Runs on every write regardless of scope.
-	content, _ = redactSecrets(workspaceID, content)
-	_, err := h.database.ExecContext(ctx, `
-		INSERT INTO agent_memories (id, workspace_id, content, scope, namespace)
-		VALUES ($1, $2, $3, $4, $5)
-	`, memoryID, workspaceID, content, scope, workspaceID)
-	if err != nil {
-		log.Printf("MCPHandler.commit_memory workspace=%s: %v", workspaceID, err)
-		return "", fmt.Errorf("failed to save memory")
-	}
-
-	return fmt.Sprintf(`{"id":%q,"scope":%q}`, memoryID, scope), nil
-}
-
-func (h *MCPHandler) toolRecallMemory(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
-	query, _ := args["query"].(string)
-	scope, _ := args["scope"].(string)
-
-	// C3: GLOBAL scope is blocked on the MCP bridge.
-	if scope == "GLOBAL" {
-		return "", fmt.Errorf("GLOBAL scope is not permitted via the MCP bridge — use LOCAL, TEAM, or empty")
-	}
-
-	var rows *sql.Rows
-	var err error
-
-	switch scope {
-	case "LOCAL":
-		rows, err = h.database.QueryContext(ctx, `
-			SELECT id, content, scope, created_at
-			FROM agent_memories
-			WHERE workspace_id = $1 AND scope = 'LOCAL'
-			  AND ($2 = '' OR content ILIKE '%' || $2 || '%')
-			ORDER BY created_at DESC LIMIT 50
-		`, workspaceID, query)
-	case "TEAM":
-		// Team scope: parent + all siblings.
-		rows, err = h.database.QueryContext(ctx, `
-			SELECT m.id, m.content, m.scope, m.created_at
-			FROM agent_memories m
-			JOIN workspaces w ON w.id = m.workspace_id
-			WHERE m.scope = 'TEAM'
-			  AND w.status != 'removed'
-			  AND (w.id = $1 OR w.parent_id = (SELECT parent_id FROM workspaces WHERE id = $1 AND parent_id IS NOT NULL))
-			  AND ($2 = '' OR m.content ILIKE '%' || $2 || '%')
-			ORDER BY m.created_at DESC LIMIT 50
-		`, workspaceID, query)
-	default:
-		// Empty scope → LOCAL only for the MCP bridge (GLOBAL excluded per C3).
-		rows, err = h.database.QueryContext(ctx, `
-			SELECT id, content, scope, created_at
-			FROM agent_memories
-			WHERE workspace_id = $1 AND scope IN ('LOCAL', 'TEAM')
-			  AND ($2 = '' OR content ILIKE '%' || $2 || '%')
-			ORDER BY created_at DESC LIMIT 50
-		`, workspaceID, query)
-	}
-	if err != nil {
-		return "", fmt.Errorf("memory search failed: %w", err)
-	}
-	defer rows.Close()
-
-	type memEntry struct {
-		ID        string `json:"id"`
-		Content   string `json:"content"`
-		Scope     string `json:"scope"`
-		CreatedAt string `json:"created_at"`
-	}
-	var results []memEntry
-	for rows.Next() {
-		var e memEntry
-		if err := rows.Scan(&e.ID, &e.Content, &e.Scope, &e.CreatedAt); err != nil {
-			continue
-		}
-		results = append(results, e)
-	}
-	if err := rows.Err(); err != nil {
-		return "", fmt.Errorf("memory scan error: %w", err)
-	}
-
-	if len(results) == 0 {
-		return "No memories found.", nil
-	}
-	b, _ := json.MarshalIndent(results, "", "  ")
-	return string(b), nil
-}
-
-// isSafeURL and isPrivateOrMetadataIP live in a2a_proxy.go -- same package,
-// shared across MCP + A2A proxy call sites. Keeping a single copy avoids
-// drift between the two SSRF gates when one is tightened and the other
-// isn't.
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Helpers
-// ─────────────────────────────────────────────────────────────────────────────
-
-// mcpResolveURL returns a routable URL for a workspace's A2A server.
-//
-// Resolution order:
-//  1. Docker-internal URL cache (set by provisioner; correct when platform is in Docker)
-//  2. Redis URL cache
-//  3. DB `url` column fallback, with 127.0.0.1→Docker bridge rewrite when in Docker
-//
-// SECURITY (F1083 / #1130): all three paths run the returned URL through
-// validateAgentURL to block SSRF targets (private IPs, loopback, cloud metadata).
-func mcpResolveURL(ctx context.Context, database *sql.DB, workspaceID string) (string, error) {
-	if platformInDocker {
-		if url, err := db.GetCachedInternalURL(ctx, workspaceID); err == nil && url != "" {
-			if err := validateAgentURL(url); err != nil {
-				return "", fmt.Errorf("workspace %s: forbidden URL from internal cache: %w", workspaceID, err)
-			}
-			return url, nil
-		}
-	}
-	if url, err := db.GetCachedURL(ctx, workspaceID); err == nil && url != "" {
-		if platformInDocker && strings.HasPrefix(url, "http://127.0.0.1:") {
-			return provisioner.InternalURL(workspaceID), nil
-		}
-		if err := validateAgentURL(url); err != nil {
-			return "", fmt.Errorf("workspace %s: forbidden URL from Redis cache: %w", workspaceID, err)
-		}
-		return url, nil
-	}
-
-	var urlStr sql.NullString
-	var status string
-	if err := database.QueryRowContext(ctx,
-		`SELECT url, status FROM workspaces WHERE id = $1`, workspaceID,
-	).Scan(&urlStr, &status); err != nil {
-		if err == sql.ErrNoRows {
-			return "", fmt.Errorf("workspace %s not found", workspaceID)
-		}
-		return "", fmt.Errorf("workspace lookup failed: %w", err)
-	}
-	if !urlStr.Valid || urlStr.String == "" {
-		return "", fmt.Errorf("workspace %s has no URL (status: %s)", workspaceID, status)
-	}
-	if platformInDocker && strings.HasPrefix(urlStr.String, "http://127.0.0.1:") {
-		return provisioner.InternalURL(workspaceID), nil
-	}
-	if err := validateAgentURL(urlStr.String); err != nil {
-		return "", fmt.Errorf("workspace %s: forbidden URL from DB: %w", workspaceID, err)
-	}
-	return urlStr.String, nil
-}
-
-// extractA2AText extracts human-readable text from an A2A JSON-RPC response body.
-// Falls back to the raw JSON when no text part can be found.
-func extractA2AText(body []byte) string {
-	var resp map[string]interface{}
-	if err := json.Unmarshal(body, &resp); err != nil {
-		return string(body)
-	}
-
-	// Propagate A2A errors.
-	if errObj, ok := resp["error"].(map[string]interface{}); ok {
-		if msg, ok := errObj["message"].(string); ok {
-			return "[error] " + msg
-		}
-	}
-
-	result, ok := resp["result"].(map[string]interface{})
-	if !ok {
-		return string(body)
-	}
-
-	// Format 1: result.artifacts[0].parts[0].text
-	if artifacts, ok := result["artifacts"].([]interface{}); ok && len(artifacts) > 0 {
-		if art, ok := artifacts[0].(map[string]interface{}); ok {
-			if parts, ok := art["parts"].([]interface{}); ok && len(parts) > 0 {
-				if part, ok := parts[0].(map[string]interface{}); ok {
-					if text, ok := part["text"].(string); ok && text != "" {
-						return text
-					}
-				}
-			}
-		}
-	}
-
-	// Format 2: result.message.parts[0].text
-	if msg, ok := result["message"].(map[string]interface{}); ok {
-		if parts, ok := msg["parts"].([]interface{}); ok && len(parts) > 0 {
-			if part, ok := parts[0].(map[string]interface{}); ok {
-				if text, ok := part["text"].(string); ok && text != "" {
-					return text
-				}
-			}
-		}
-	}
-
-	// Fallback: marshal result as JSON.
-	b, _ := json.Marshal(result)
-	return string(b)
-}
-
diff --git a/workspace-server/internal/handlers/mcp_tools.go b/workspace-server/internal/handlers/mcp_tools.go
new file mode 100644
index 00000000..26df4fdd
--- /dev/null
+++ b/workspace-server/internal/handlers/mcp_tools.go
@@ -0,0 +1,635 @@
+package handlers
+
+// mcp_tools.go — MCP bridge tool implementations.
+// Each tool* method handles one A2A tool: list_peers, get_workspace_info,
+// delegate_task, delegate_task_async, check_task_status, send_message_to_user,
+// commit_memory, recall_memory. Also contains URL resolution, SSRF checks,
+// and A2A response parsing helpers.
+
+import (
+	"bytes"
+	"context"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
+	"github.com/google/uuid"
+)
+// ─────────────────────────────────────────────────────────────────────────────
+// Tool implementations
+// ─────────────────────────────────────────────────────────────────────────────
+
+func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (string, error) {
+	var parentID sql.NullString
+	err := h.database.QueryRowContext(ctx,
+		`SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID,
+	).Scan(&parentID)
+	if err == sql.ErrNoRows {
+		return "", fmt.Errorf("workspace not found")
+	}
+	if err != nil {
+		return "", fmt.Errorf("lookup failed: %w", err)
+	}
+
+	type peer struct {
+		ID     string `json:"id"`
+		Name   string `json:"name"`
+		Role   string `json:"role"`
+		Status string `json:"status"`
+		Tier   int    `json:"tier"`
+	}
+
+	var peers []peer
+
+	scanPeers := func(rows *sql.Rows) error {
+		defer rows.Close()
+		for rows.Next() {
+			var p peer
+			if err := rows.Scan(&p.ID, &p.Name, &p.Role, &p.Status, &p.Tier); err != nil {
+				return err
+			}
+			peers = append(peers, p)
+		}
+		return rows.Err()
+	}
+
+	const cols = `SELECT w.id, w.name, COALESCE(w.role,''), w.status, w.tier`
+
+	// Siblings
+	if parentID.Valid {
+		rows, err := h.database.QueryContext(ctx,
+			cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
+			parentID.String, workspaceID)
+		if err == nil {
+			_ = scanPeers(rows)
+		}
+	} else {
+		rows, err := h.database.QueryContext(ctx,
+			cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
+			workspaceID)
+		if err == nil {
+			_ = scanPeers(rows)
+		}
+	}
+
+	// Children
+	{
+		rows, err := h.database.QueryContext(ctx,
+			cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.status != 'removed'`,
+			workspaceID)
+		if err == nil {
+			_ = scanPeers(rows)
+		}
+	}
+
+	// Parent
+	if parentID.Valid {
+		rows, err := h.database.QueryContext(ctx,
+			cols+` FROM workspaces w WHERE w.id = $1 AND w.status != 'removed'`,
+			parentID.String)
+		if err == nil {
+			_ = scanPeers(rows)
+		}
+	}
+
+	if len(peers) == 0 {
+		return "No peers found.", nil
+	}
+
+	b, _ := json.MarshalIndent(peers, "", "  ")
+	return string(b), nil
+}
+
+func (h *MCPHandler) toolGetWorkspaceInfo(ctx context.Context, workspaceID string) (string, error) {
+	var id, name, role, status string
+	var tier int
+	var parentID sql.NullString
+
+	err := h.database.QueryRowContext(ctx, `
+		SELECT id, name, COALESCE(role,''), tier, status, parent_id
+		FROM workspaces WHERE id = $1
+	`, workspaceID).Scan(&id, &name, &role, &tier, &status, &parentID)
+	if err == sql.ErrNoRows {
+		return "", fmt.Errorf("workspace not found")
+	}
+	if err != nil {
+		return "", fmt.Errorf("lookup failed: %w", err)
+	}
+
+	info := map[string]interface{}{
+		"id":     id,
+		"name":   name,
+		"role":   role,
+		"tier":   tier,
+		"status": status,
+	}
+	if parentID.Valid {
+		info["parent_id"] = parentID.String
+	}
+	b, _ := json.MarshalIndent(info, "", "  ")
+	return string(b), nil
+}
+
+func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args map[string]interface{}, timeout time.Duration) (string, error) {
+	targetID, _ := args["workspace_id"].(string)
+	task, _ := args["task"].(string)
+	if targetID == "" {
+		return "", fmt.Errorf("workspace_id is required")
+	}
+	if task == "" {
+		return "", fmt.Errorf("task is required")
+	}
+
+	if !registry.CanCommunicate(callerID, targetID) {
+		return "", fmt.Errorf("workspace %s is not authorised to communicate with %s", callerID, targetID)
+	}
+
+	agentURL, err := mcpResolveURL(ctx, h.database, targetID)
+	if err != nil {
+		return "", err
+	}
+	// SSRF defence: reject private/metadata URLs before making outbound call.
+	if err := isSafeURL(agentURL); err != nil {
+		return "", fmt.Errorf("invalid workspace URL: %w", err)
+	}
+
+	a2aBody, err := json.Marshal(map[string]interface{}{
+		"jsonrpc": "2.0",
+		"id":      uuid.New().String(),
+		"method":  "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":      "user",
+				"parts":     []map[string]interface{}{{"type": "text", "text": task}},
+				"messageId": uuid.New().String(),
+			},
+		},
+	})
+	if err != nil {
+		return "", fmt.Errorf("failed to build A2A request: %w", err)
+	}
+
+	reqCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	httpReq, err := http.NewRequestWithContext(reqCtx, "POST", agentURL+"/a2a", bytes.NewReader(a2aBody))
+	if err != nil {
+		return "", fmt.Errorf("failed to create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	// X-Workspace-ID identifies this caller to the A2A proxy. The /workspaces/:id/a2a
+	// endpoint is intentionally outside WorkspaceAuth (agents do not hold bearer tokens
+	// to peer workspaces). Access control is enforced by CanCommunicate above, which
+	// already validated callerID → targetID before this request is constructed.
+	// callerID was authenticated by WorkspaceAuth on the MCP bridge entry point,
+	// so this header reflects a verified caller identity, not a spoofable value.
+	httpReq.Header.Set("X-Workspace-ID", callerID)
+
+	resp, err := http.DefaultClient.Do(httpReq)
+	if err != nil {
+		return "", fmt.Errorf("A2A call failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
+	if err != nil {
+		return "", fmt.Errorf("failed to read response: %w", err)
+	}
+
+	return extractA2AText(body), nil
+}
+
+func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
+	targetID, _ := args["workspace_id"].(string)
+	task, _ := args["task"].(string)
+	if targetID == "" {
+		return "", fmt.Errorf("workspace_id is required")
+	}
+	if task == "" {
+		return "", fmt.Errorf("task is required")
+	}
+
+	if !registry.CanCommunicate(callerID, targetID) {
+		return "", fmt.Errorf("workspace %s is not authorised to communicate with %s", callerID, targetID)
+	}
+
+	taskID := uuid.New().String()
+
+	// Fire and forget in a detached goroutine. Use a background context so
+	// the call is not cancelled when the HTTP request completes.
+	go func() {
+		bgCtx, cancel := context.WithTimeout(context.Background(), mcpAsyncCallTimeout)
+		defer cancel()
+
+		agentURL, err := mcpResolveURL(bgCtx, h.database, targetID)
+		if err != nil {
+			log.Printf("MCPHandler.delegate_task_async: resolve URL for %s: %v", targetID, err)
+			return
+		}
+		// SSRF defence: reject private/metadata URLs before making outbound call.
+		if err := isSafeURL(agentURL); err != nil {
+			log.Printf("MCPHandler.delegate_task_async: unsafe URL for %s: %v", targetID, err)
+			return
+		}
+
+		a2aBody, _ := json.Marshal(map[string]interface{}{
+			"jsonrpc": "2.0",
+			"id":      taskID,
+			"method":  "message/send",
+			"params": map[string]interface{}{
+				"message": map[string]interface{}{
+					"role":      "user",
+					"parts":     []map[string]interface{}{{"type": "text", "text": task}},
+					"messageId": uuid.New().String(),
+				},
+			},
+		})
+
+		httpReq, err := http.NewRequestWithContext(bgCtx, "POST", agentURL+"/a2a", bytes.NewReader(a2aBody))
+		if err != nil {
+			log.Printf("MCPHandler.delegate_task_async: create request: %v", err)
+			return
+		}
+		httpReq.Header.Set("Content-Type", "application/json")
+		httpReq.Header.Set("X-Workspace-ID", callerID)
+
+		resp, err := http.DefaultClient.Do(httpReq)
+		if err != nil {
+			log.Printf("MCPHandler.delegate_task_async: A2A call to %s: %v", targetID, err)
+			return
+		}
+		defer func() { _ = resp.Body.Close() }()
+		// Drain response so the connection can be reused.
+		_, _ = io.Copy(io.Discard, resp.Body)
+	}()
+
+	return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, taskID, targetID), nil
+}
+
+func (h *MCPHandler) toolCheckTaskStatus(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
+	targetID, _ := args["workspace_id"].(string)
+	taskID, _ := args["task_id"].(string)
+	if targetID == "" {
+		return "", fmt.Errorf("workspace_id is required")
+	}
+	if taskID == "" {
+		return "", fmt.Errorf("task_id is required")
+	}
+
+	var status, errorDetail sql.NullString
+	var responseBody []byte
+
+	err := h.database.QueryRowContext(ctx, `
+		SELECT status, error_detail, response_body
+		FROM activity_logs
+		WHERE workspace_id = $1
+		  AND target_id = $2
+		  AND request_body->>'delegation_id' = $3
+		ORDER BY created_at DESC
+		LIMIT 1
+	`, callerID, targetID, taskID).Scan(&status, &errorDetail, &responseBody)
+	if err == sql.ErrNoRows {
+		return fmt.Sprintf(`{"task_id":%q,"status":"not_found","note":"task not tracked or not yet dispatched"}`, taskID), nil
+	}
+	if err != nil {
+		return "", fmt.Errorf("status lookup failed: %w", err)
+	}
+
+	result := map[string]interface{}{
+		"task_id":   taskID,
+		"status":    status.String,
+		"target_id": targetID,
+	}
+	if errorDetail.Valid && errorDetail.String != "" {
+		result["error"] = errorDetail.String
+	}
+	if len(responseBody) > 0 {
+		result["result"] = extractA2AText(responseBody)
+	}
+	b, _ := json.MarshalIndent(result, "", "  ")
+	return string(b), nil
+}
+
+func (h *MCPHandler) toolSendMessageToUser(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
+	message, _ := args["message"].(string)
+	if message == "" {
+		return "", fmt.Errorf("message is required")
+	}
+
+	// Check send_message_to_user is enabled (C3).
+	if os.Getenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE") != "true" {
+		return "", fmt.Errorf("send_message_to_user is not enabled on this MCP bridge (set MOLECULE_MCP_ALLOW_SEND_MESSAGE=true)")
+	}
+
+	var wsName string
+	err := h.database.QueryRowContext(ctx,
+		`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`, workspaceID,
+	).Scan(&wsName)
+	if err != nil {
+		return "", fmt.Errorf("workspace not found")
+	}
+
+	h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", map[string]interface{}{
+		"message":      message,
+		"workspace_id": workspaceID,
+		"name":         wsName,
+	})
+
+	return "Message sent.", nil
+}
+
+
+func (h *MCPHandler) toolCommitMemory(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
+	content, _ := args["content"].(string)
+	scope, _ := args["scope"].(string)
+	if content == "" {
+		return "", fmt.Errorf("content is required")
+	}
+	if scope == "" {
+		scope = "LOCAL"
+	}
+
+	// C3: GLOBAL scope is blocked on the MCP bridge.
+	if scope == "GLOBAL" {
+		return "", fmt.Errorf("GLOBAL scope is not permitted via the MCP bridge — use LOCAL or TEAM")
+	}
+	if scope != "LOCAL" && scope != "TEAM" {
+		return "", fmt.Errorf("scope must be LOCAL or TEAM")
+	}
+
+	memoryID := uuid.New().String()
+	// SAFE-T1201 (#838): scrub known credential patterns before persistence so
+	// plain-text API keys pulled in via tool responses can't land in the
+	// memories table (and leak into shared TEAM scope). Reuses redactSecrets
+	// already shipped for the HTTP path in PR #881 — this was the MCP-bridge
+	// sibling the original fix missed. Runs on every write regardless of scope.
+	content, _ = redactSecrets(workspaceID, content)
+	_, err := h.database.ExecContext(ctx, `
+		INSERT INTO agent_memories (id, workspace_id, content, scope, namespace)
+		VALUES ($1, $2, $3, $4, $5)
+	`, memoryID, workspaceID, content, scope, workspaceID)
+	if err != nil {
+		log.Printf("MCPHandler.commit_memory workspace=%s: %v", workspaceID, err)
+		return "", fmt.Errorf("failed to save memory")
+	}
+
+	return fmt.Sprintf(`{"id":%q,"scope":%q}`, memoryID, scope), nil
+}
+
+func (h *MCPHandler) toolRecallMemory(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
+	query, _ := args["query"].(string)
+	scope, _ := args["scope"].(string)
+
+	// C3: GLOBAL scope is blocked on the MCP bridge.
+	if scope == "GLOBAL" {
+		return "", fmt.Errorf("GLOBAL scope is not permitted via the MCP bridge — use LOCAL, TEAM, or empty")
+	}
+
+	var rows *sql.Rows
+	var err error
+
+	switch scope {
+	case "LOCAL":
+		rows, err = h.database.QueryContext(ctx, `
+			SELECT id, content, scope, created_at
+			FROM agent_memories
+			WHERE workspace_id = $1 AND scope = 'LOCAL'
+			  AND ($2 = '' OR content ILIKE '%' || $2 || '%')
+			ORDER BY created_at DESC LIMIT 50
+		`, workspaceID, query)
+	case "TEAM":
+		// Team scope: parent + all siblings.
+		rows, err = h.database.QueryContext(ctx, `
+			SELECT m.id, m.content, m.scope, m.created_at
+			FROM agent_memories m
+			JOIN workspaces w ON w.id = m.workspace_id
+			WHERE m.scope = 'TEAM'
+			  AND w.status != 'removed'
+			  AND (w.id = $1 OR w.parent_id = (SELECT parent_id FROM workspaces WHERE id = $1 AND parent_id IS NOT NULL))
+			  AND ($2 = '' OR m.content ILIKE '%' || $2 || '%')
+			ORDER BY m.created_at DESC LIMIT 50
+		`, workspaceID, query)
+	default:
+		// Empty scope → LOCAL only for the MCP bridge (GLOBAL excluded per C3).
+		rows, err = h.database.QueryContext(ctx, `
+			SELECT id, content, scope, created_at
+			FROM agent_memories
+			WHERE workspace_id = $1 AND scope IN ('LOCAL', 'TEAM')
+			  AND ($2 = '' OR content ILIKE '%' || $2 || '%')
+			ORDER BY created_at DESC LIMIT 50
+		`, workspaceID, query)
+	}
+	if err != nil {
+		return "", fmt.Errorf("memory search failed: %w", err)
+	}
+	defer rows.Close()
+
+	type memEntry struct {
+		ID        string `json:"id"`
+		Content   string `json:"content"`
+		Scope     string `json:"scope"`
+		CreatedAt string `json:"created_at"`
+	}
+	var results []memEntry
+	for rows.Next() {
+		var e memEntry
+		if err := rows.Scan(&e.ID, &e.Content, &e.Scope, &e.CreatedAt); err != nil {
+			continue
+		}
+		results = append(results, e)
+	}
+	if err := rows.Err(); err != nil {
+		return "", fmt.Errorf("memory scan error: %w", err)
+	}
+
+	if len(results) == 0 {
+		return "No memories found.", nil
+	}
+	b, _ := json.MarshalIndent(results, "", "  ")
+	return string(b), nil
+}
+
+// isSafeURL validates that a URL resolves to a publicly-routable address,
+// preventing A2A requests from being redirected to internal/cloud-metadata
+// infrastructure (SSRF, CWE-918). Workspace URLs come from DB/Redis caches
+// so we validate before making any outbound HTTP call.
+func isSafeURL(rawURL string) error {
+	u, err := url.Parse(rawURL)
+	if err != nil {
+		return fmt.Errorf("invalid URL: %w", err)
+	}
+	// Reject non-HTTP(S) schemes.
+	if u.Scheme != "http" && u.Scheme != "https" {
+		return fmt.Errorf("forbidden scheme: %s (only http/https allowed)", u.Scheme)
+	}
+	host := u.Hostname()
+	if host == "" {
+		return fmt.Errorf("empty hostname")
+	}
+	// Block direct IP addresses.
+	if ip := net.ParseIP(host); ip != nil {
+		if ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() {
+			return fmt.Errorf("forbidden loopback/unspecified IP: %s", ip)
+		}
+		if isPrivateOrMetadataIP(ip) {
+			return fmt.Errorf("forbidden private/metadata IP: %s", ip)
+		}
+		return nil
+	}
+	// For hostnames, resolve and validate each returned IP.
+	addrs, err := net.LookupHost(host)
+	if err != nil {
+		// DNS resolution failure — block it. Could be an internal hostname.
+		return fmt.Errorf("DNS resolution blocked for hostname: %s (%v)", host, err)
+	}
+	if len(addrs) == 0 {
+		return fmt.Errorf("DNS returned no addresses for: %s", host)
+	}
+	for _, addr := range addrs {
+		ip := net.ParseIP(addr)
+		if ip != nil && (ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || isPrivateOrMetadataIP(ip)) {
+			return fmt.Errorf("hostname %s resolves to forbidden IP: %s", host, ip)
+		}
+	}
+	return nil
+}
+
+// isPrivateOrMetadataIP returns true for RFC-1918 private, carrier-grade NAT,
+// link-local, and cloud metadata ranges.
+func isPrivateOrMetadataIP(ip net.IP) bool {
+	var privateRanges = []net.IPNet{
+		{IP: net.ParseIP("10.0.0.0"), Mask: net.CIDRMask(8, 32)},
+		{IP: net.ParseIP("172.16.0.0"), Mask: net.CIDRMask(12, 32)},
+		{IP: net.ParseIP("192.168.0.0"), Mask: net.CIDRMask(16, 32)},
+		{IP: net.ParseIP("169.254.0.0"), Mask: net.CIDRMask(16, 32)},
+		{IP: net.ParseIP("100.64.0.0"), Mask: net.CIDRMask(10, 32)},
+		{IP: net.ParseIP("192.0.2.0"), Mask: net.CIDRMask(24, 32)},
+		{IP: net.ParseIP("198.51.100.0"), Mask: net.CIDRMask(24, 32)},
+		{IP: net.ParseIP("203.0.113.0"), Mask: net.CIDRMask(24, 32)},
+	}
+	ip = ip.To4()
+	if ip == nil {
+		return false
+	}
+	for _, r := range privateRanges {
+		if r.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Helpers
+// ─────────────────────────────────────────────────────────────────────────────
+
+// mcpResolveURL returns a routable URL for a workspace's A2A server.
+//
+// Resolution order:
+//  1. Docker-internal URL cache (set by provisioner; correct when platform is in Docker)
+//  2. Redis URL cache
+//  3. DB `url` column fallback, with 127.0.0.1→Docker bridge rewrite when in Docker
+//
+// SECURITY (F1083 / #1130): all three paths run the returned URL through
+// validateAgentURL to block SSRF targets (private IPs, loopback, cloud metadata).
+func mcpResolveURL(ctx context.Context, database *sql.DB, workspaceID string) (string, error) {
+	if platformInDocker {
+		if url, err := db.GetCachedInternalURL(ctx, workspaceID); err == nil && url != "" {
+			if err := validateAgentURL(url); err != nil {
+				return "", fmt.Errorf("workspace %s: forbidden URL from internal cache: %w", workspaceID, err)
+			}
+			return url, nil
+		}
+	}
+	if url, err := db.GetCachedURL(ctx, workspaceID); err == nil && url != "" {
+		if platformInDocker && strings.HasPrefix(url, "http://127.0.0.1:") {
+			return provisioner.InternalURL(workspaceID), nil
+		}
+		if err := validateAgentURL(url); err != nil {
+			return "", fmt.Errorf("workspace %s: forbidden URL from Redis cache: %w", workspaceID, err)
+		}
+		return url, nil
+	}
+
+	var urlStr sql.NullString
+	var status string
+	if err := database.QueryRowContext(ctx,
+		`SELECT url, status FROM workspaces WHERE id = $1`, workspaceID,
+	).Scan(&urlStr, &status); err != nil {
+		if err == sql.ErrNoRows {
+			return "", fmt.Errorf("workspace %s not found", workspaceID)
+		}
+		return "", fmt.Errorf("workspace lookup failed: %w", err)
+	}
+	if !urlStr.Valid || urlStr.String == "" {
+		return "", fmt.Errorf("workspace %s has no URL (status: %s)", workspaceID, status)
+	}
+	if platformInDocker && strings.HasPrefix(urlStr.String, "http://127.0.0.1:") {
+		return provisioner.InternalURL(workspaceID), nil
+	}
+	if err := validateAgentURL(urlStr.String); err != nil {
+		return "", fmt.Errorf("workspace %s: forbidden URL from DB: %w", workspaceID, err)
+	}
+	return urlStr.String, nil
+}
+
+// extractA2AText extracts human-readable text from an A2A JSON-RPC response body.
+// Falls back to the raw JSON when no text part can be found.
+func extractA2AText(body []byte) string {
+	var resp map[string]interface{}
+	if err := json.Unmarshal(body, &resp); err != nil {
+		return string(body)
+	}
+
+	// Propagate A2A errors.
+	if errObj, ok := resp["error"].(map[string]interface{}); ok {
+		if msg, ok := errObj["message"].(string); ok {
+			return "[error] " + msg
+		}
+	}
+
+	result, ok := resp["result"].(map[string]interface{})
+	if !ok {
+		return string(body)
+	}
+
+	// Format 1: result.artifacts[0].parts[0].text
+	if artifacts, ok := result["artifacts"].([]interface{}); ok && len(artifacts) > 0 {
+		if art, ok := artifacts[0].(map[string]interface{}); ok {
+			if parts, ok := art["parts"].([]interface{}); ok && len(parts) > 0 {
+				if part, ok := parts[0].(map[string]interface{}); ok {
+					if text, ok := part["text"].(string); ok && text != "" {
+						return text
+					}
+				}
+			}
+		}
+	}
+
+	// Format 2: result.message.parts[0].text
+	if msg, ok := result["message"].(map[string]interface{}); ok {
+		if parts, ok := msg["parts"].([]interface{}); ok && len(parts) > 0 {
+			if part, ok := parts[0].(map[string]interface{}); ok {
+				if text, ok := part["text"].(string); ok && text != "" {
+					return text
+				}
+			}
+		}
+	}
+
+	// Fallback: marshal result as JSON.
+	b, _ := json.Marshal(result)
+	return string(b)
+}
+
diff --git a/workspace-server/internal/handlers/org.go b/workspace-server/internal/handlers/org.go
index cd59a142..af5ee09a 100644
--- a/workspace-server/internal/handlers/org.go
+++ b/workspace-server/internal/handlers/org.go
@@ -1,27 +1,21 @@
 package handlers
 
+// org.go — core org handler: types, struct, ListTemplates, Import.
+// Tree creation logic is in org_import.go; utility helpers in org_helpers.go.
+
 import (
 	"context"
-	"encoding/json"
 	"fmt"
 	"log"
 	"net/http"
 	"os"
 	"path/filepath"
-	"regexp"
-	"sort"
-	"strings"
-	"time"
 
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler"
 	"github.com/gin-gonic/gin"
-	"github.com/google/uuid"
 	"gopkg.in/yaml.v3"
 )
 
@@ -353,747 +347,3 @@ func (h *OrgHandler) Import(c *gin.Context) {
 	c.JSON(status, resp)
 }
 
-// createWorkspaceTree recursively creates a workspace and its children.
-// provisionSem limits concurrent Docker container creation (#1084).
-func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
-	// Apply defaults
-	runtime := ws.Runtime
-	if runtime == "" {
-		runtime = defaults.Runtime
-	}
-	if runtime == "" {
-		runtime = "langgraph"
-	}
-	model := ws.Model
-	if model == "" {
-		model = defaults.Model
-	}
-	if model == "" {
-		if runtime == "claude-code" {
-			model = "sonnet"
-		} else {
-			model = "anthropic:claude-opus-4-7"
-		}
-	}
-	tier := ws.Tier
-	if tier == 0 {
-		tier = defaults.Tier
-	}
-	if tier == 0 {
-		tier = 2
-	}
-
-	id := uuid.New().String()
-	awarenessNS := workspaceAwarenessNamespace(id)
-
-	var role interface{}
-	if ws.Role != "" {
-		role = ws.Role
-	}
-
-	// Expand ${VAR} references in workspace_dir against the org's .env files
-	// before validation. Without this, a template that ships
-	// `workspace_dir: ${WORKSPACE_DIR}` (so each operator can pick the host
-	// path to bind-mount) reaches validateWorkspaceDir as the literal
-	// "${WORKSPACE_DIR}" string and fails with "must be an absolute path".
-	// Other fields (channel config, prompts) already go through expandWithEnv;
-	// workspace_dir was the last hold-out.
-	if ws.WorkspaceDir != "" {
-		ws.WorkspaceDir = expandWithEnv(ws.WorkspaceDir, loadWorkspaceEnv(orgBaseDir, ws.FilesDir))
-	}
-
-	// Validate and convert workspace_dir to NULL if empty
-	var workspaceDir interface{}
-	if ws.WorkspaceDir != "" {
-		if err := validateWorkspaceDir(ws.WorkspaceDir); err != nil {
-			return fmt.Errorf("workspace %s: %w", ws.Name, err)
-		}
-		workspaceDir = ws.WorkspaceDir
-	}
-
-	// #65: validate workspace_access (defaults to "none" when empty).
-	workspaceAccess := ws.WorkspaceAccess
-	if workspaceAccess == "" {
-		workspaceAccess = provisioner.WorkspaceAccessNone
-	}
-	if err := provisioner.ValidateWorkspaceAccess(workspaceAccess, ws.WorkspaceDir); err != nil {
-		return fmt.Errorf("workspace %s: %w", ws.Name, err)
-	}
-
-	ctx := context.Background()
-
-	// Insert workspace
-	_, err := db.DB.ExecContext(ctx, `
-		INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access)
-		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
-	`, id, ws.Name, role, tier, runtime, awarenessNS, "provisioning", parentID, workspaceDir, workspaceAccess)
-	if err != nil {
-		log.Printf("Org import: failed to create %s: %v", ws.Name, err)
-		return fmt.Errorf("failed to create %s: %w", ws.Name, err)
-	}
-
-	// Canvas layout with coordinates from YAML
-	if _, err := db.DB.ExecContext(ctx, `INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)`, id, ws.Canvas.X, ws.Canvas.Y); err != nil {
-		log.Printf("Org import: canvas layout insert failed for %s: %v", ws.Name, err)
-	}
-
-	// Broadcast
-	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISIONING", id, map[string]interface{}{
-		"name": ws.Name, "tier": tier,
-	})
-
-	// Seed initial memories from workspace config or defaults (issue #1050).
-	// Per-workspace initial_memories override defaults; if workspace has none,
-	// fall back to defaults.initial_memories.
-	wsMemories := ws.InitialMemories
-	if len(wsMemories) == 0 {
-		wsMemories = defaults.InitialMemories
-	}
-	seedInitialMemories(ctx, id, wsMemories, awarenessNS)
-
-	// Handle external workspaces
-	if ws.External {
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'online', url = $1 WHERE id = $2`, ws.URL, id); err != nil {
-			log.Printf("Org import: external workspace status update failed for %s: %v", ws.Name, err)
-		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", id, map[string]interface{}{
-			"name": ws.Name, "external": true,
-		})
-	} else if h.provisioner != nil {
-		// Provision container
-		payload := models.CreateWorkspacePayload{
-			Name: ws.Name, Tier: tier, Runtime: runtime, Model: model,
-			WorkspaceDir:    ws.WorkspaceDir,
-			WorkspaceAccess: workspaceAccess,
-		}
-		templatePath := ""
-		if ws.Template != "" {
-			// `template` comes from the uploaded YAML — treat as untrusted.
-			// Only accept paths that stay inside h.configsDir.
-			if tp, err := resolveInsideRoot(h.configsDir, ws.Template); err == nil {
-				if _, statErr := os.Stat(tp); statErr == nil {
-					templatePath = tp
-				}
-			}
-		}
-		if templatePath == "" {
-			// #241: sanitizeRuntime() allowlists the runtime string so a
-			// crafted org.yaml cannot use it as a path-traversal oracle.
-			safeRuntime := sanitizeRuntime(runtime)
-			runtimeDefault := filepath.Join(h.configsDir, safeRuntime+"-default")
-			if _, err := os.Stat(runtimeDefault); err == nil {
-				templatePath = runtimeDefault
-			}
-		}
-
-		// Always generate default config.yaml (runtime, model, tier, etc.)
-		configFiles := h.workspace.ensureDefaultConfig(id, payload)
-
-		// Copy files_dir contents on top (system-prompt.md, CLAUDE.md, skills/, etc.)
-		// Uses templatePath for CopyTemplateToContainer — runs AFTER configFiles are written
-		if ws.FilesDir != "" && orgBaseDir != "" {
-			// `files_dir` also comes from untrusted YAML. Join inside orgBaseDir
-			// (already validated above) and reject anything that escapes.
-			if filesPath, err := resolveInsideRoot(orgBaseDir, ws.FilesDir); err == nil {
-				if info, statErr := os.Stat(filesPath); statErr == nil && info.IsDir() {
-					templatePath = filesPath
-				}
-			}
-		}
-
-		// Pre-install plugins: copy from registry into configFiles as plugins/<name>/*.
-		// Per-workspace plugins UNION with defaults.plugins (issue #68).
-		// A leading "!" or "-" on a per-workspace entry opts that plugin out.
-		plugins := mergePlugins(defaults.Plugins, ws.Plugins)
-		if len(plugins) > 0 {
-			if configFiles == nil {
-				configFiles = map[string][]byte{}
-			}
-			pluginsBase, _ := filepath.Abs(filepath.Join(h.configsDir, "..", "plugins"))
-			for _, pluginName := range plugins {
-				pluginSrc := filepath.Join(pluginsBase, pluginName)
-				if info, err := os.Stat(pluginSrc); err != nil || !info.IsDir() {
-					log.Printf("Org import: plugin %s not found at %s, skipping", pluginName, pluginSrc)
-					continue
-				}
-				filepath.Walk(pluginSrc, func(path string, info os.FileInfo, err error) error {
-					if err != nil || info.IsDir() {
-						return nil
-					}
-					rel, _ := filepath.Rel(pluginSrc, path)
-					data, readErr := os.ReadFile(path)
-					if readErr == nil {
-						configFiles["plugins/"+pluginName+"/"+rel] = data
-					}
-					return nil
-				})
-			}
-		}
-
-		// Render category_routing into config.yaml so the agent can read its routing
-		// table at runtime without hardcoded role names in prompts (issue #51).
-		// Per-workspace keys replace defaults per-key (empty list drops the key);
-		// see mergeCategoryRouting for exact semantics.
-		routing := mergeCategoryRouting(defaults.CategoryRouting, ws.CategoryRouting)
-		if len(routing) > 0 {
-			if configFiles == nil {
-				configFiles = map[string][]byte{}
-			}
-			block, err := renderCategoryRoutingYAML(routing)
-			if err != nil {
-				log.Printf("Org import: failed to render category_routing for %s: %v", ws.Name, err)
-			} else {
-				configFiles["config.yaml"] = appendYAMLBlock(configFiles["config.yaml"], block)
-			}
-		}
-
-		// Resolve initial_prompt — inline wins, then file-ref, then defaults
-		// (inline → file → defaults.inline → defaults.file). File refs are
-		// rooted at <orgBaseDir>/<files_dir>/ per resolvePromptRef semantics.
-		initialPrompt, err := resolvePromptRef(ws.InitialPrompt, ws.InitialPromptFile, orgBaseDir, ws.FilesDir)
-		if err != nil {
-			log.Printf("Org import: failed to resolve initial_prompt for %s: %v", ws.Name, err)
-		}
-		if initialPrompt == "" {
-			// Fall back to defaults. Defaults live at the org root, so they
-			// resolve with empty filesDir (relative to orgBaseDir).
-			var defaultErr error
-			initialPrompt, defaultErr = resolvePromptRef(defaults.InitialPrompt, defaults.InitialPromptFile, orgBaseDir, "")
-			if defaultErr != nil {
-				log.Printf("Org import: failed to resolve defaults.initial_prompt for %s: %v", ws.Name, defaultErr)
-			}
-		}
-		if initialPrompt != "" {
-			if configFiles == nil {
-				configFiles = map[string][]byte{}
-			}
-			// Append initial_prompt to config.yaml using YAML block scalar.
-			// Trim each line to avoid trailing whitespace issues.
-			trimmed := strings.TrimSpace(initialPrompt)
-			lines := strings.Split(trimmed, "\n")
-			for i, line := range lines {
-				lines[i] = strings.TrimRight(line, " \t")
-			}
-			indented := strings.Join(lines, "\n  ")
-			configFiles["config.yaml"] = appendYAMLBlock(configFiles["config.yaml"], fmt.Sprintf("initial_prompt: |\n  %s\n", indented))
-			log.Printf("Org import: injected initial_prompt (%d chars) into config.yaml for %s", len(trimmed), ws.Name)
-		}
-
-		// Resolve idle_prompt — same precedence (ws inline → ws file → defaults).
-		// Inject into config.yaml alongside idle_interval_seconds so the
-		// workspace's heartbeat loop picks up the idle-reflection cadence on
-		// boot (see workspace/heartbeat.py + config.py).
-		idlePrompt, err := resolvePromptRef(ws.IdlePrompt, ws.IdlePromptFile, orgBaseDir, ws.FilesDir)
-		if err != nil {
-			log.Printf("Org import: failed to resolve idle_prompt for %s: %v", ws.Name, err)
-		}
-		if idlePrompt == "" {
-			var defaultErr error
-			idlePrompt, defaultErr = resolvePromptRef(defaults.IdlePrompt, defaults.IdlePromptFile, orgBaseDir, "")
-			if defaultErr != nil {
-				log.Printf("Org import: failed to resolve defaults.idle_prompt for %s: %v", ws.Name, defaultErr)
-			}
-		}
-		idleInterval := ws.IdleIntervalSeconds
-		if idleInterval == 0 {
-			idleInterval = defaults.IdleIntervalSeconds
-		}
-		if idlePrompt != "" {
-			if configFiles == nil {
-				configFiles = map[string][]byte{}
-			}
-			trimmed := strings.TrimSpace(idlePrompt)
-			lines := strings.Split(trimmed, "\n")
-			for i, line := range lines {
-				lines[i] = strings.TrimRight(line, " \t")
-			}
-			indented := strings.Join(lines, "\n  ")
-			// idle_interval_seconds belongs with idle_prompt — empty idle_prompt
-			// means the idle loop never fires regardless of interval, so we
-			// only emit interval when there's a body to go with it.
-			if idleInterval <= 0 {
-				idleInterval = 600 // same default as workspace/config.py
-			}
-			block := fmt.Sprintf("idle_interval_seconds: %d\nidle_prompt: |\n  %s\n", idleInterval, indented)
-			configFiles["config.yaml"] = appendYAMLBlock(configFiles["config.yaml"], block)
-			log.Printf("Org import: injected idle_prompt (%d chars, interval=%ds) into config.yaml for %s", len(trimmed), idleInterval, ws.Name)
-		}
-
-		// Inline system_prompt (only if no files_dir provides one)
-		if ws.SystemPrompt != "" {
-			if configFiles == nil {
-				configFiles = map[string][]byte{}
-			}
-			configFiles["system-prompt.md"] = []byte(ws.SystemPrompt)
-		}
-
-		// Inject secrets from .env files as workspace secrets.
-		// Resolution: workspace .env → org root .env (workspace overrides org root).
-		// Each line: KEY=VALUE → stored as encrypted workspace secret.
-		envVars := map[string]string{}
-		if orgBaseDir != "" {
-			// 1. Org root .env (shared defaults)
-			parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars)
-			// 2. Workspace-specific .env (overrides)
-			if ws.FilesDir != "" {
-				parseEnvFile(filepath.Join(orgBaseDir, ws.FilesDir, ".env"), envVars)
-			}
-		}
-		// Store as workspace secrets via DB (encrypted if key is set, raw otherwise)
-		for key, value := range envVars {
-			var encrypted []byte
-			if crypto.IsEnabled() {
-				var err error
-				encrypted, err = crypto.Encrypt([]byte(value))
-				if err != nil {
-					log.Printf("Org import: failed to encrypt secret %s for %s: %v", key, ws.Name, err)
-					continue
-				}
-			} else {
-				encrypted = []byte(value) // store raw when encryption disabled
-			}
-			if _, err := db.DB.ExecContext(ctx, `
-				INSERT INTO workspace_secrets (workspace_id, key, encrypted_value)
-				VALUES ($1, $2, $3)
-				ON CONFLICT (workspace_id, key) DO UPDATE SET encrypted_value = $3, updated_at = now()
-			`, id, key, encrypted); err != nil {
-				log.Printf("Org import: failed to insert secret %s for %s: %v", key, ws.Name, err)
-			}
-		}
-
-		// #1084: limit concurrent Docker provisioning via semaphore.
-		provisionSem <- struct{}{} // acquire
-		go func(wID, tPath string, cFiles map[string][]byte, p models.CreateWorkspacePayload) {
-			defer func() { <-provisionSem }() // release
-			h.workspace.provisionWorkspace(wID, tPath, cFiles, p)
-		}(id, templatePath, configFiles, payload)
-	}
-
-	// Insert schedules if defined. Resolve each schedule's prompt body from
-	// either inline `prompt:` or `prompt_file:` (file ref relative to the
-	// workspace's files_dir). Inline wins; empty prompt after resolution is
-	// a configuration error (cron with no body would never do anything).
-	for _, sched := range ws.Schedules {
-		tz := sched.Timezone
-		if tz == "" {
-			tz = "UTC"
-		}
-		enabled := true
-		if sched.Enabled != nil {
-			enabled = *sched.Enabled
-		}
-		prompt, promptErr := resolvePromptRef(sched.Prompt, sched.PromptFile, orgBaseDir, ws.FilesDir)
-		if promptErr != nil {
-			log.Printf("Org import: failed to resolve prompt for schedule '%s' on %s: %v — skipping insert", sched.Name, ws.Name, promptErr)
-			continue
-		}
-		if prompt == "" {
-			log.Printf("Org import: schedule '%s' on %s has empty prompt (neither prompt nor prompt_file set) — skipping insert", sched.Name, ws.Name)
-			continue
-		}
-		// #722: surface the error rather than silently using time.Time{} (zero)
-		// which lib/pq stores as 0001-01-01 and may confuse the fire query.
-		nextRun, nextRunErr := scheduler.ComputeNextRun(sched.CronExpr, tz, time.Now())
-		if nextRunErr != nil {
-			log.Printf("Org import: invalid cron expression for schedule '%s' on %s: %v — skipping insert",
-				sched.Name, ws.Name, nextRunErr)
-			continue
-		}
-		if _, err := db.DB.ExecContext(context.Background(), orgImportScheduleSQL,
-			id, sched.Name, sched.CronExpr, tz, prompt, enabled, nextRun); err != nil {
-			log.Printf("Org import: failed to upsert schedule '%s' for %s: %v", sched.Name, ws.Name, err)
-		} else {
-			log.Printf("Org import: schedule '%s' (%s, %d chars) upserted for %s (source=template)", sched.Name, sched.CronExpr, len(prompt), ws.Name)
-		}
-	}
-
-	// Insert channels if defined (Telegram, Slack, etc.). Config values
-	// support ${VAR} expansion from .env files. The manager is reloaded
-	// once at the end of org import (in Import), not per-workspace.
-	channelEnv := loadWorkspaceEnv(orgBaseDir, ws.FilesDir)
-	wsChannelsCreated := []string{}
-	wsChannelsSkipped := []map[string]string{}
-	// skipChannel records a skipped channel with consistent shape across all reasons.
-	skipChannel := func(channelType, reason string) {
-		wsChannelsSkipped = append(wsChannelsSkipped, map[string]string{
-			"workspace": ws.Name,
-			"type":      channelType, // empty string when type field was missing
-			"reason":    reason,
-		})
-	}
-
-	for _, ch := range ws.Channels {
-		if ch.Type == "" {
-			skipChannel("", "empty type")
-			log.Printf("Org import: skipping channel with empty type for %s", ws.Name)
-			continue
-		}
-		// Validate adapter exists upfront — fail fast instead of inserting orphan rows
-		adapter, ok := channels.GetAdapter(ch.Type)
-		if !ok {
-			skipChannel(ch.Type, "unknown adapter")
-			log.Printf("Org import: skipping %s channel for %s — no adapter registered", ch.Type, ws.Name)
-			continue
-		}
-
-		expandedConfig := make(map[string]interface{}, len(ch.Config))
-		missing := []string{}
-		for k, v := range ch.Config {
-			expanded := expandWithEnv(v, channelEnv)
-			if hasUnresolvedVarRef(v, expanded) {
-				missing = append(missing, v)
-			}
-			expandedConfig[k] = expanded
-		}
-		if len(missing) > 0 {
-			skipChannel(ch.Type, fmt.Sprintf("missing env: %v", missing))
-			log.Printf("Org import: skipping %s channel for %s — env vars not set: %v", ch.Type, ws.Name, missing)
-			continue
-		}
-
-		// Adapter-level config validation
-		if err := adapter.ValidateConfig(expandedConfig); err != nil {
-			skipChannel(ch.Type, err.Error())
-			log.Printf("Org import: skipping %s channel for %s — invalid config: %v", ch.Type, ws.Name, err)
-			continue
-		}
-
-		configJSON, err := json.Marshal(expandedConfig)
-		if err != nil {
-			log.Printf("Org import: failed to marshal config for %s channel: %v", ch.Type, err)
-			continue
-		}
-		allowedJSON, err := json.Marshal(ch.AllowedUsers)
-		if err != nil {
-			log.Printf("Org import: failed to marshal allowed_users for %s channel: %v", ch.Type, err)
-			continue
-		}
-		enabled := true
-		if ch.Enabled != nil {
-			enabled = *ch.Enabled
-		}
-		// Idempotent insert — if same workspace+type already exists, update config
-		if _, err := db.DB.ExecContext(context.Background(), `
-			INSERT INTO workspace_channels (workspace_id, channel_type, channel_config, enabled, allowed_users)
-			VALUES ($1, $2, $3::jsonb, $4, $5::jsonb)
-			ON CONFLICT (workspace_id, channel_type) DO UPDATE
-			SET channel_config = EXCLUDED.channel_config,
-			    enabled = EXCLUDED.enabled,
-			    allowed_users = EXCLUDED.allowed_users,
-			    updated_at = now()
-		`, id, ch.Type, string(configJSON), enabled, string(allowedJSON)); err != nil {
-			log.Printf("Org import: failed to create %s channel for %s: %v", ch.Type, ws.Name, err)
-		} else {
-			wsChannelsCreated = append(wsChannelsCreated, ch.Type)
-			log.Printf("Org import: %s channel created for %s", ch.Type, ws.Name)
-		}
-	}
-
-	resultEntry := map[string]interface{}{
-		"id":   id,
-		"name": ws.Name,
-		"tier": tier,
-	}
-	if len(wsChannelsCreated) > 0 {
-		resultEntry["channels"] = wsChannelsCreated
-	}
-	if len(wsChannelsSkipped) > 0 {
-		resultEntry["channels_skipped"] = wsChannelsSkipped
-	}
-	*results = append(*results, resultEntry)
-
-	// Recurse into children. Brief pacing avoids overwhelming Docker when
-	// creating many containers in sequence; container provisioning runs in
-	// goroutines so the main createWorkspaceTree returns quickly.
-	for _, child := range ws.Children {
-		if err := h.createWorkspaceTree(child, &id, defaults, orgBaseDir, results, provisionSem); err != nil {
-			return err
-		}
-		time.Sleep(workspaceCreatePacingMs * time.Millisecond)
-	}
-
-	return nil
-}
-
-func countWorkspaces(workspaces []OrgWorkspace) int {
-	count := len(workspaces)
-	for _, ws := range workspaces {
-		count += countWorkspaces(ws.Children)
-	}
-	return count
-}
-
-// resolvePromptRef reads a prompt body from either an inline string or a
-// file ref relative to the workspace's files_dir. Inline always wins when
-// both are non-empty (caller-provided inline is more authoritative than a
-// file path that may not exist yet during dev loops).
-//
-// File resolution:
-//   - `<orgBaseDir>/<filesDir>/<fileRef>` when filesDir is non-empty
-//   - `<orgBaseDir>/<fileRef>` when filesDir is empty (defaults-level refs)
-//
-// Both paths go through resolveInsideRoot so a crafted fileRef can't escape
-// the org template directory via traversal (same defense the files_dir
-// copy-step uses).
-//
-// Returns (resolved body, error). If both inline and fileRef are empty,
-// returns ("", nil) — caller decides whether that's a problem.
-func resolvePromptRef(inline, fileRef, orgBaseDir, filesDir string) (string, error) {
-	if inline != "" {
-		return inline, nil
-	}
-	if fileRef == "" {
-		return "", nil
-	}
-	if orgBaseDir == "" {
-		// Inline-only template (POST /org/import with a raw Template in the
-		// JSON body, not a dir). File refs can't be resolved — surface the
-		// problem rather than silently returning empty.
-		return "", fmt.Errorf("prompt_file %q requires a dir-based org template (no orgBaseDir in inline-template mode)", fileRef)
-	}
-	searchRoot := orgBaseDir
-	if filesDir != "" {
-		p, err := resolveInsideRoot(orgBaseDir, filesDir)
-		if err != nil {
-			return "", fmt.Errorf("invalid files_dir %q: %w", filesDir, err)
-		}
-		searchRoot = p
-	}
-	abs, err := resolveInsideRoot(searchRoot, fileRef)
-	if err != nil {
-		return "", fmt.Errorf("invalid prompt_file %q: %w", fileRef, err)
-	}
-	data, err := os.ReadFile(abs)
-	if err != nil {
-		return "", fmt.Errorf("read prompt_file %q: %w", fileRef, err)
-	}
-	return string(data), nil
-}
-
-// envVarRefPattern matches actual ${VAR} or $VAR references (not literal $).
-// Used to detect unresolved placeholders without false positives like "$5".
-var envVarRefPattern = regexp.MustCompile(`\$\{?[A-Za-z_][A-Za-z0-9_]*\}?`)
-
-// hasUnresolvedVarRef returns true if the original string had a ${VAR} or $VAR
-// reference that the expanded string didn't fully replace (i.e. the var was unset).
-func hasUnresolvedVarRef(original, expanded string) bool {
-	if !envVarRefPattern.MatchString(original) {
-		return false // no var refs to resolve
-	}
-	// If expansion produced the same string and that string still has refs, unresolved.
-	// If expansion stripped them to "", also unresolved.
-	return expanded == "" || envVarRefPattern.MatchString(expanded)
-}
-
-// expandWithEnv expands ${VAR} and $VAR references in s using the env map.
-// Falls back to the platform process env if a var isn't in the map.
-func expandWithEnv(s string, env map[string]string) string {
-	return os.Expand(s, func(key string) string {
-		if v, ok := env[key]; ok {
-			return v
-		}
-		return os.Getenv(key)
-	})
-}
-
-// loadWorkspaceEnv reads the org root .env and the workspace-specific .env
-// (workspace overrides org root). Used by both secret injection and channel
-// config expansion.
-func loadWorkspaceEnv(orgBaseDir, filesDir string) map[string]string {
-	envVars := map[string]string{}
-	if orgBaseDir == "" {
-		return envVars
-	}
-	parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars)
-	if filesDir != "" {
-		parseEnvFile(filepath.Join(orgBaseDir, filesDir, ".env"), envVars)
-	}
-	return envVars
-}
-
-// parseEnvFile reads a .env file and adds KEY=VALUE pairs to the map.
-// Skips comments (#) and empty lines. Values can be quoted.
-func parseEnvFile(path string, out map[string]string) {
-	data, err := os.ReadFile(path)
-	if err != nil {
-		return
-	}
-	for _, line := range strings.Split(string(data), "\n") {
-		line = strings.TrimSpace(line)
-		if line == "" || strings.HasPrefix(line, "#") {
-			continue
-		}
-		parts := strings.SplitN(line, "=", 2)
-		if len(parts) != 2 {
-			continue
-		}
-		key := strings.TrimSpace(parts[0])
-		value := strings.TrimSpace(parts[1])
-		// Strip surrounding quotes
-		if len(value) >= 2 && ((value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'')) {
-			value = value[1 : len(value)-1]
-		}
-		if key != "" && value != "" {
-			out[key] = value
-		}
-	}
-}
-
-// mergeCategoryRouting unions defaults.category_routing with per-workspace
-// category_routing. Workspace-level keys override the default's value for that
-// key (the role list is replaced wholesale, not unioned per-key, so a workspace
-// can narrow a category — e.g. "infra: [DevOps Only]"). Empty role lists drop
-// the category entirely. See issue #51.
-func mergeCategoryRouting(defaultRouting, wsRouting map[string][]string) map[string][]string {
-	out := map[string][]string{}
-	for k, v := range defaultRouting {
-		if k == "" || len(v) == 0 {
-			continue
-		}
-		cp := make([]string, len(v))
-		copy(cp, v)
-		out[k] = cp
-	}
-	for k, v := range wsRouting {
-		if k == "" {
-			continue
-		}
-		if len(v) == 0 {
-			// Empty list = explicit "drop this category for this workspace"
-			delete(out, k)
-			continue
-		}
-		cp := make([]string, len(v))
-		copy(cp, v)
-		out[k] = cp
-	}
-	return out
-}
-
-// renderCategoryRoutingYAML emits a deterministic YAML block of the form:
-//
-//	category_routing:
-//	  security: [Backend Engineer, DevOps]
-//	  ui: [Frontend Engineer]
-//
-// Keys are sorted for stable, test-friendly output. Uses yaml.Node + yaml.Marshal
-// so role names containing YAML-reserved characters (colons, quotes, unicode line
-// separators, etc.) are escaped by the YAML library — no ad-hoc quoting.
-func renderCategoryRoutingYAML(routing map[string][]string) (string, error) {
-	if len(routing) == 0 {
-		return "", nil
-	}
-	keys := make([]string, 0, len(routing))
-	for k := range routing {
-		if k == "" {
-			continue
-		}
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-
-	inner := &yaml.Node{Kind: yaml.MappingNode}
-	for _, k := range keys {
-		keyNode := &yaml.Node{Kind: yaml.ScalarNode, Value: k}
-		valNode := &yaml.Node{Kind: yaml.SequenceNode, Style: yaml.FlowStyle}
-		for _, role := range routing[k] {
-			valNode.Content = append(valNode.Content, &yaml.Node{Kind: yaml.ScalarNode, Value: role})
-		}
-		inner.Content = append(inner.Content, keyNode, valNode)
-	}
-	doc := &yaml.Node{Kind: yaml.MappingNode}
-	doc.Content = []*yaml.Node{
-		{Kind: yaml.ScalarNode, Value: "category_routing"},
-		inner,
-	}
-	out, err := yaml.Marshal(doc)
-	if err != nil {
-		return "", err
-	}
-	return string(out), nil
-}
-
-// appendYAMLBlock concatenates a YAML fragment to an existing buffer, guaranteeing
-// a newline boundary between them. Upstream code writes config.yaml in fragments
-// (base template → category_routing → initial_prompt) and the base isn't
-// guaranteed to end in \n, which would merge the last line into the next block.
-func appendYAMLBlock(existing []byte, block string) []byte {
-	if len(existing) > 0 && existing[len(existing)-1] != '\n' {
-		existing = append(existing, '\n')
-	}
-	return append(existing, []byte(block)...)
-}
-
-// mergePlugins returns the union of defaults and per-workspace plugin lists
-// (deduplicated, defaults first). A per-workspace entry starting with "!" or
-// "-" opts that plugin OUT of the union. See issue #68.
-func mergePlugins(defaultPlugins, wsPlugins []string) []string {
-	seen := map[string]bool{}
-	out := make([]string, 0, len(defaultPlugins)+len(wsPlugins))
-	for _, p := range defaultPlugins {
-		if p == "" || seen[p] {
-			continue
-		}
-		seen[p] = true
-		out = append(out, p)
-	}
-	for _, p := range wsPlugins {
-		if p == "" {
-			continue
-		}
-		if strings.HasPrefix(p, "!") || strings.HasPrefix(p, "-") {
-			target := strings.TrimLeft(p, "!-")
-			if target == "" {
-				continue
-			}
-			if seen[target] {
-				delete(seen, target)
-				filtered := out[:0]
-				for _, existing := range out {
-					if existing != target {
-						filtered = append(filtered, existing)
-					}
-				}
-				out = filtered
-			}
-			continue
-		}
-		if !seen[p] {
-			seen[p] = true
-			out = append(out, p)
-		}
-	}
-	return out
-}
-
-// resolveInsideRoot joins `userPath` onto `root` and ensures the lexically
-// cleaned result stays inside root. Rejects absolute paths outright and
-// anything containing ".." that would escape the root.
-//
-// Both arguments are resolved to absolute paths via filepath.Abs before the
-// prefix check so a root passed as a relative path still works correctly.
-// Follows Go's standard pattern for SSRF-class path sanitization; using
-// strings.HasPrefix on an absolute-path pair plus the separator guard rejects
-// sibling directories that share a prefix (e.g. "/foo" vs "/foobar").
-func resolveInsideRoot(root, userPath string) (string, error) {
-	if userPath == "" {
-		return "", fmt.Errorf("path is empty")
-	}
-	if filepath.IsAbs(userPath) {
-		return "", fmt.Errorf("absolute paths are not allowed")
-	}
-	absRoot, err := filepath.Abs(root)
-	if err != nil {
-		return "", fmt.Errorf("root abs: %w", err)
-	}
-	joined := filepath.Join(absRoot, userPath)
-	absJoined, err := filepath.Abs(joined)
-	if err != nil {
-		return "", fmt.Errorf("joined abs: %w", err)
-	}
-	// Allow exact-root match (rare but valid) and any descendant.
-	if absJoined != absRoot && !strings.HasPrefix(absJoined, absRoot+string(filepath.Separator)) {
-		return "", fmt.Errorf("path escapes root")
-	}
-	return absJoined, nil
-}
diff --git a/workspace-server/internal/handlers/org_helpers.go b/workspace-server/internal/handlers/org_helpers.go
new file mode 100644
index 00000000..f84baf3d
--- /dev/null
+++ b/workspace-server/internal/handlers/org_helpers.go
@@ -0,0 +1,290 @@
+package handlers
+
+// org_helpers.go — utility functions for org template processing.
+// Prompt resolution, env file parsing, category routing, plugin merging,
+// path sanitization.
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+// resolvePromptRef reads a prompt body from either an inline string or a
+// file ref relative to the workspace's files_dir. Inline always wins when
+// both are non-empty (caller-provided inline is more authoritative than a
+// file path that may not exist yet during dev loops).
+//
+// File resolution:
+//   - `<orgBaseDir>/<filesDir>/<fileRef>` when filesDir is non-empty
+//   - `<orgBaseDir>/<fileRef>` when filesDir is empty (defaults-level refs)
+//
+// Both paths go through resolveInsideRoot so a crafted fileRef can't escape
+// the org template directory via traversal (same defense the files_dir
+// copy-step uses).
+//
+// Returns (resolved body, error). If both inline and fileRef are empty,
+// returns ("", nil) — caller decides whether that's a problem.
+func resolvePromptRef(inline, fileRef, orgBaseDir, filesDir string) (string, error) {
+	if inline != "" {
+		return inline, nil
+	}
+	if fileRef == "" {
+		return "", nil
+	}
+	if orgBaseDir == "" {
+		// Inline-only template (POST /org/import with a raw Template in the
+		// JSON body, not a dir). File refs can't be resolved — surface the
+		// problem rather than silently returning empty.
+		return "", fmt.Errorf("prompt_file %q requires a dir-based org template (no orgBaseDir in inline-template mode)", fileRef)
+	}
+	searchRoot := orgBaseDir
+	if filesDir != "" {
+		p, err := resolveInsideRoot(orgBaseDir, filesDir)
+		if err != nil {
+			return "", fmt.Errorf("invalid files_dir %q: %w", filesDir, err)
+		}
+		searchRoot = p
+	}
+	abs, err := resolveInsideRoot(searchRoot, fileRef)
+	if err != nil {
+		return "", fmt.Errorf("invalid prompt_file %q: %w", fileRef, err)
+	}
+	data, err := os.ReadFile(abs)
+	if err != nil {
+		return "", fmt.Errorf("read prompt_file %q: %w", fileRef, err)
+	}
+	return string(data), nil
+}
+
+// envVarRefPattern matches actual ${VAR} or $VAR references (not literal $).
+// Used to detect unresolved placeholders without false positives like "$5".
+var envVarRefPattern = regexp.MustCompile(`\$\{?[A-Za-z_][A-Za-z0-9_]*\}?`)
+
+// hasUnresolvedVarRef returns true if the original string had a ${VAR} or $VAR
+// reference that the expanded string didn't fully replace (i.e. the var was unset).
+func hasUnresolvedVarRef(original, expanded string) bool {
+	if !envVarRefPattern.MatchString(original) {
+		return false // no var refs to resolve
+	}
+	// If expansion produced the same string and that string still has refs, unresolved.
+	// If expansion stripped them to "", also unresolved.
+	return expanded == "" || envVarRefPattern.MatchString(expanded)
+}
+
+// expandWithEnv expands ${VAR} and $VAR references in s using the env map.
+// Falls back to the platform process env if a var isn't in the map.
+func expandWithEnv(s string, env map[string]string) string {
+	return os.Expand(s, func(key string) string {
+		if v, ok := env[key]; ok {
+			return v
+		}
+		return os.Getenv(key)
+	})
+}
+
+// loadWorkspaceEnv reads the org root .env and the workspace-specific .env
+// (workspace overrides org root). Used by both secret injection and channel
+// config expansion.
+func loadWorkspaceEnv(orgBaseDir, filesDir string) map[string]string {
+	envVars := map[string]string{}
+	if orgBaseDir == "" {
+		return envVars
+	}
+	parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars)
+	if filesDir != "" {
+		parseEnvFile(filepath.Join(orgBaseDir, filesDir, ".env"), envVars)
+	}
+	return envVars
+}
+
+// parseEnvFile reads a .env file and adds KEY=VALUE pairs to the map.
+// Skips comments (#) and empty lines. Values can be quoted.
+func parseEnvFile(path string, out map[string]string) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		parts := strings.SplitN(line, "=", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		key := strings.TrimSpace(parts[0])
+		value := strings.TrimSpace(parts[1])
+		// Strip surrounding quotes
+		if len(value) >= 2 && ((value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'')) {
+			value = value[1 : len(value)-1]
+		}
+		if key != "" && value != "" {
+			out[key] = value
+		}
+	}
+}
+
+// mergeCategoryRouting unions defaults.category_routing with per-workspace
+// category_routing. Workspace-level keys override the default's value for that
+// key (the role list is replaced wholesale, not unioned per-key, so a workspace
+// can narrow a category — e.g. "infra: [DevOps Only]"). Empty role lists drop
+// the category entirely. See issue #51.
+func mergeCategoryRouting(defaultRouting, wsRouting map[string][]string) map[string][]string {
+	out := map[string][]string{}
+	for k, v := range defaultRouting {
+		if k == "" || len(v) == 0 {
+			continue
+		}
+		cp := make([]string, len(v))
+		copy(cp, v)
+		out[k] = cp
+	}
+	for k, v := range wsRouting {
+		if k == "" {
+			continue
+		}
+		if len(v) == 0 {
+			// Empty list = explicit "drop this category for this workspace"
+			delete(out, k)
+			continue
+		}
+		cp := make([]string, len(v))
+		copy(cp, v)
+		out[k] = cp
+	}
+	return out
+}
+
+// renderCategoryRoutingYAML emits a deterministic YAML block of the form:
+//
+//	category_routing:
+//	  security: [Backend Engineer, DevOps]
+//	  ui: [Frontend Engineer]
+//
+// Keys are sorted for stable, test-friendly output. Uses yaml.Node + yaml.Marshal
+// so role names containing YAML-reserved characters (colons, quotes, unicode line
+// separators, etc.) are escaped by the YAML library — no ad-hoc quoting.
+func renderCategoryRoutingYAML(routing map[string][]string) (string, error) {
+	if len(routing) == 0 {
+		return "", nil
+	}
+	keys := make([]string, 0, len(routing))
+	for k := range routing {
+		if k == "" {
+			continue
+		}
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	inner := &yaml.Node{Kind: yaml.MappingNode}
+	for _, k := range keys {
+		keyNode := &yaml.Node{Kind: yaml.ScalarNode, Value: k}
+		valNode := &yaml.Node{Kind: yaml.SequenceNode, Style: yaml.FlowStyle}
+		for _, role := range routing[k] {
+			valNode.Content = append(valNode.Content, &yaml.Node{Kind: yaml.ScalarNode, Value: role})
+		}
+		inner.Content = append(inner.Content, keyNode, valNode)
+	}
+	doc := &yaml.Node{Kind: yaml.MappingNode}
+	doc.Content = []*yaml.Node{
+		{Kind: yaml.ScalarNode, Value: "category_routing"},
+		inner,
+	}
+	out, err := yaml.Marshal(doc)
+	if err != nil {
+		return "", err
+	}
+	return string(out), nil
+}
+
+// appendYAMLBlock concatenates a YAML fragment to an existing buffer, guaranteeing
+// a newline boundary between them. Upstream code writes config.yaml in fragments
+// (base template → category_routing → initial_prompt) and the base isn't
+// guaranteed to end in \n, which would merge the last line into the next block.
+func appendYAMLBlock(existing []byte, block string) []byte {
+	if len(existing) > 0 && existing[len(existing)-1] != '\n' {
+		existing = append(existing, '\n')
+	}
+	return append(existing, []byte(block)...)
+}
+
+// mergePlugins returns the union of defaults and per-workspace plugin lists
+// (deduplicated, defaults first). A per-workspace entry starting with "!" or
+// "-" opts that plugin OUT of the union. See issue #68.
+func mergePlugins(defaultPlugins, wsPlugins []string) []string {
+	seen := map[string]bool{}
+	out := make([]string, 0, len(defaultPlugins)+len(wsPlugins))
+	for _, p := range defaultPlugins {
+		if p == "" || seen[p] {
+			continue
+		}
+		seen[p] = true
+		out = append(out, p)
+	}
+	for _, p := range wsPlugins {
+		if p == "" {
+			continue
+		}
+		if strings.HasPrefix(p, "!") || strings.HasPrefix(p, "-") {
+			target := strings.TrimLeft(p, "!-")
+			if target == "" {
+				continue
+			}
+			if seen[target] {
+				delete(seen, target)
+				filtered := out[:0]
+				for _, existing := range out {
+					if existing != target {
+						filtered = append(filtered, existing)
+					}
+				}
+				out = filtered
+			}
+			continue
+		}
+		if !seen[p] {
+			seen[p] = true
+			out = append(out, p)
+		}
+	}
+	return out
+}
+
+// resolveInsideRoot joins `userPath` onto `root` and ensures the lexically
+// cleaned result stays inside root. Rejects absolute paths outright and
+// anything containing ".." that would escape the root.
+//
+// Both arguments are resolved to absolute paths via filepath.Abs before the
+// prefix check so a root passed as a relative path still works correctly.
+// Follows Go's standard pattern for SSRF-class path sanitization; using
+// strings.HasPrefix on an absolute-path pair plus the separator guard rejects
+// sibling directories that share a prefix (e.g. "/foo" vs "/foobar").
+func resolveInsideRoot(root, userPath string) (string, error) {
+	if userPath == "" {
+		return "", fmt.Errorf("path is empty")
+	}
+	if filepath.IsAbs(userPath) {
+		return "", fmt.Errorf("absolute paths are not allowed")
+	}
+	absRoot, err := filepath.Abs(root)
+	if err != nil {
+		return "", fmt.Errorf("root abs: %w", err)
+	}
+	joined := filepath.Join(absRoot, userPath)
+	absJoined, err := filepath.Abs(joined)
+	if err != nil {
+		return "", fmt.Errorf("joined abs: %w", err)
+	}
+	// Allow exact-root match (rare but valid) and any descendant.
+	if absJoined != absRoot && !strings.HasPrefix(absJoined, absRoot+string(filepath.Separator)) {
+		return "", fmt.Errorf("path escapes root")
+	}
+	return absJoined, nil
+}
diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go
new file mode 100644
index 00000000..442f5836
--- /dev/null
+++ b/workspace-server/internal/handlers/org_import.go
@@ -0,0 +1,490 @@
+package handlers
+
+// org_import.go — workspace tree creation during org template import.
+// Contains createWorkspaceTree (recursive provisioning) and countWorkspaces.
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler"
+	"github.com/google/uuid"
+)
+func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
+	// Apply defaults
+	runtime := ws.Runtime
+	if runtime == "" {
+		runtime = defaults.Runtime
+	}
+	if runtime == "" {
+		runtime = "langgraph"
+	}
+	model := ws.Model
+	if model == "" {
+		model = defaults.Model
+	}
+	if model == "" {
+		if runtime == "claude-code" {
+			model = "sonnet"
+		} else {
+			model = "anthropic:claude-opus-4-7"
+		}
+	}
+	tier := ws.Tier
+	if tier == 0 {
+		tier = defaults.Tier
+	}
+	if tier == 0 {
+		tier = 2
+	}
+
+	id := uuid.New().String()
+	awarenessNS := workspaceAwarenessNamespace(id)
+
+	var role interface{}
+	if ws.Role != "" {
+		role = ws.Role
+	}
+
+	// Expand ${VAR} references in workspace_dir against the org's .env files
+	// before validation. Without this, a template that ships
+	// `workspace_dir: ${WORKSPACE_DIR}` (so each operator can pick the host
+	// path to bind-mount) reaches validateWorkspaceDir as the literal
+	// "${WORKSPACE_DIR}" string and fails with "must be an absolute path".
+	// Other fields (channel config, prompts) already go through expandWithEnv;
+	// workspace_dir was the last hold-out.
+	if ws.WorkspaceDir != "" {
+		ws.WorkspaceDir = expandWithEnv(ws.WorkspaceDir, loadWorkspaceEnv(orgBaseDir, ws.FilesDir))
+	}
+
+	// Validate and convert workspace_dir to NULL if empty
+	var workspaceDir interface{}
+	if ws.WorkspaceDir != "" {
+		if err := validateWorkspaceDir(ws.WorkspaceDir); err != nil {
+			return fmt.Errorf("workspace %s: %w", ws.Name, err)
+		}
+		workspaceDir = ws.WorkspaceDir
+	}
+
+	// #65: validate workspace_access (defaults to "none" when empty).
+	workspaceAccess := ws.WorkspaceAccess
+	if workspaceAccess == "" {
+		workspaceAccess = provisioner.WorkspaceAccessNone
+	}
+	if err := provisioner.ValidateWorkspaceAccess(workspaceAccess, ws.WorkspaceDir); err != nil {
+		return fmt.Errorf("workspace %s: %w", ws.Name, err)
+	}
+
+	ctx := context.Background()
+
+	// Insert workspace
+	_, err := db.DB.ExecContext(ctx, `
+		INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
+	`, id, ws.Name, role, tier, runtime, awarenessNS, "provisioning", parentID, workspaceDir, workspaceAccess)
+	if err != nil {
+		log.Printf("Org import: failed to create %s: %v", ws.Name, err)
+		return fmt.Errorf("failed to create %s: %w", ws.Name, err)
+	}
+
+	// Canvas layout with coordinates from YAML
+	if _, err := db.DB.ExecContext(ctx, `INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)`, id, ws.Canvas.X, ws.Canvas.Y); err != nil {
+		log.Printf("Org import: canvas layout insert failed for %s: %v", ws.Name, err)
+	}
+
+	// Broadcast
+	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISIONING", id, map[string]interface{}{
+		"name": ws.Name, "tier": tier,
+	})
+
+	// Seed initial memories from workspace config or defaults (issue #1050).
+	// Per-workspace initial_memories override defaults; if workspace has none,
+	// fall back to defaults.initial_memories.
+	wsMemories := ws.InitialMemories
+	if len(wsMemories) == 0 {
+		wsMemories = defaults.InitialMemories
+	}
+	seedInitialMemories(ctx, id, wsMemories, awarenessNS)
+
+	// Handle external workspaces
+	if ws.External {
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'online', url = $1 WHERE id = $2`, ws.URL, id); err != nil {
+			log.Printf("Org import: external workspace status update failed for %s: %v", ws.Name, err)
+		}
+		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", id, map[string]interface{}{
+			"name": ws.Name, "external": true,
+		})
+	} else if h.provisioner != nil {
+		// Provision container
+		payload := models.CreateWorkspacePayload{
+			Name: ws.Name, Tier: tier, Runtime: runtime, Model: model,
+			WorkspaceDir:    ws.WorkspaceDir,
+			WorkspaceAccess: workspaceAccess,
+		}
+		templatePath := ""
+		if ws.Template != "" {
+			// `template` comes from the uploaded YAML — treat as untrusted.
+			// Only accept paths that stay inside h.configsDir.
+			if tp, err := resolveInsideRoot(h.configsDir, ws.Template); err == nil {
+				if _, statErr := os.Stat(tp); statErr == nil {
+					templatePath = tp
+				}
+			}
+		}
+		if templatePath == "" {
+			// #241: sanitizeRuntime() allowlists the runtime string so a
+			// crafted org.yaml cannot use it as a path-traversal oracle.
+			safeRuntime := sanitizeRuntime(runtime)
+			runtimeDefault := filepath.Join(h.configsDir, safeRuntime+"-default")
+			if _, err := os.Stat(runtimeDefault); err == nil {
+				templatePath = runtimeDefault
+			}
+		}
+
+		// Always generate default config.yaml (runtime, model, tier, etc.)
+		configFiles := h.workspace.ensureDefaultConfig(id, payload)
+
+		// Copy files_dir contents on top (system-prompt.md, CLAUDE.md, skills/, etc.)
+		// Uses templatePath for CopyTemplateToContainer — runs AFTER configFiles are written
+		if ws.FilesDir != "" && orgBaseDir != "" {
+			// `files_dir` also comes from untrusted YAML. Join inside orgBaseDir
+			// (already validated above) and reject anything that escapes.
+			if filesPath, err := resolveInsideRoot(orgBaseDir, ws.FilesDir); err == nil {
+				if info, statErr := os.Stat(filesPath); statErr == nil && info.IsDir() {
+					templatePath = filesPath
+				}
+			}
+		}
+
+		// Pre-install plugins: copy from registry into configFiles as plugins/<name>/*.
+		// Per-workspace plugins UNION with defaults.plugins (issue #68).
+		// A leading "!" or "-" on a per-workspace entry opts that plugin out.
+		plugins := mergePlugins(defaults.Plugins, ws.Plugins)
+		if len(plugins) > 0 {
+			if configFiles == nil {
+				configFiles = map[string][]byte{}
+			}
+			pluginsBase, _ := filepath.Abs(filepath.Join(h.configsDir, "..", "plugins"))
+			for _, pluginName := range plugins {
+				pluginSrc := filepath.Join(pluginsBase, pluginName)
+				if info, err := os.Stat(pluginSrc); err != nil || !info.IsDir() {
+					log.Printf("Org import: plugin %s not found at %s, skipping", pluginName, pluginSrc)
+					continue
+				}
+				filepath.Walk(pluginSrc, func(path string, info os.FileInfo, err error) error {
+					if err != nil || info.IsDir() {
+						return nil
+					}
+					rel, _ := filepath.Rel(pluginSrc, path)
+					data, readErr := os.ReadFile(path)
+					if readErr == nil {
+						configFiles["plugins/"+pluginName+"/"+rel] = data
+					}
+					return nil
+				})
+			}
+		}
+
+		// Render category_routing into config.yaml so the agent can read its routing
+		// table at runtime without hardcoded role names in prompts (issue #51).
+		// Per-workspace keys replace defaults per-key (empty list drops the key);
+		// see mergeCategoryRouting for exact semantics.
+		routing := mergeCategoryRouting(defaults.CategoryRouting, ws.CategoryRouting)
+		if len(routing) > 0 {
+			if configFiles == nil {
+				configFiles = map[string][]byte{}
+			}
+			block, err := renderCategoryRoutingYAML(routing)
+			if err != nil {
+				log.Printf("Org import: failed to render category_routing for %s: %v", ws.Name, err)
+			} else {
+				configFiles["config.yaml"] = appendYAMLBlock(configFiles["config.yaml"], block)
+			}
+		}
+
+		// Resolve initial_prompt — inline wins, then file-ref, then defaults
+		// (inline → file → defaults.inline → defaults.file). File refs are
+		// rooted at <orgBaseDir>/<files_dir>/ per resolvePromptRef semantics.
+		initialPrompt, err := resolvePromptRef(ws.InitialPrompt, ws.InitialPromptFile, orgBaseDir, ws.FilesDir)
+		if err != nil {
+			log.Printf("Org import: failed to resolve initial_prompt for %s: %v", ws.Name, err)
+		}
+		if initialPrompt == "" {
+			// Fall back to defaults. Defaults live at the org root, so they
+			// resolve with empty filesDir (relative to orgBaseDir).
+			var defaultErr error
+			initialPrompt, defaultErr = resolvePromptRef(defaults.InitialPrompt, defaults.InitialPromptFile, orgBaseDir, "")
+			if defaultErr != nil {
+				log.Printf("Org import: failed to resolve defaults.initial_prompt for %s: %v", ws.Name, defaultErr)
+			}
+		}
+		if initialPrompt != "" {
+			if configFiles == nil {
+				configFiles = map[string][]byte{}
+			}
+			// Append initial_prompt to config.yaml using YAML block scalar.
+			// Trim each line to avoid trailing whitespace issues.
+			trimmed := strings.TrimSpace(initialPrompt)
+			lines := strings.Split(trimmed, "\n")
+			for i, line := range lines {
+				lines[i] = strings.TrimRight(line, " \t")
+			}
+			indented := strings.Join(lines, "\n  ")
+			configFiles["config.yaml"] = appendYAMLBlock(configFiles["config.yaml"], fmt.Sprintf("initial_prompt: |\n  %s\n", indented))
+			log.Printf("Org import: injected initial_prompt (%d chars) into config.yaml for %s", len(trimmed), ws.Name)
+		}
+
+		// Resolve idle_prompt — same precedence (ws inline → ws file → defaults).
+		// Inject into config.yaml alongside idle_interval_seconds so the
+		// workspace's heartbeat loop picks up the idle-reflection cadence on
+		// boot (see workspace/heartbeat.py + config.py).
+		idlePrompt, err := resolvePromptRef(ws.IdlePrompt, ws.IdlePromptFile, orgBaseDir, ws.FilesDir)
+		if err != nil {
+			log.Printf("Org import: failed to resolve idle_prompt for %s: %v", ws.Name, err)
+		}
+		if idlePrompt == "" {
+			var defaultErr error
+			idlePrompt, defaultErr = resolvePromptRef(defaults.IdlePrompt, defaults.IdlePromptFile, orgBaseDir, "")
+			if defaultErr != nil {
+				log.Printf("Org import: failed to resolve defaults.idle_prompt for %s: %v", ws.Name, defaultErr)
+			}
+		}
+		idleInterval := ws.IdleIntervalSeconds
+		if idleInterval == 0 {
+			idleInterval = defaults.IdleIntervalSeconds
+		}
+		if idlePrompt != "" {
+			if configFiles == nil {
+				configFiles = map[string][]byte{}
+			}
+			trimmed := strings.TrimSpace(idlePrompt)
+			lines := strings.Split(trimmed, "\n")
+			for i, line := range lines {
+				lines[i] = strings.TrimRight(line, " \t")
+			}
+			indented := strings.Join(lines, "\n  ")
+			// idle_interval_seconds belongs with idle_prompt — empty idle_prompt
+			// means the idle loop never fires regardless of interval, so we
+			// only emit interval when there's a body to go with it.
+			if idleInterval <= 0 {
+				idleInterval = 600 // same default as workspace/config.py
+			}
+			block := fmt.Sprintf("idle_interval_seconds: %d\nidle_prompt: |\n  %s\n", idleInterval, indented)
+			configFiles["config.yaml"] = appendYAMLBlock(configFiles["config.yaml"], block)
+			log.Printf("Org import: injected idle_prompt (%d chars, interval=%ds) into config.yaml for %s", len(trimmed), idleInterval, ws.Name)
+		}
+
+		// Inline system_prompt (only if no files_dir provides one)
+		if ws.SystemPrompt != "" {
+			if configFiles == nil {
+				configFiles = map[string][]byte{}
+			}
+			configFiles["system-prompt.md"] = []byte(ws.SystemPrompt)
+		}
+
+		// Inject secrets from .env files as workspace secrets.
+		// Resolution: workspace .env → org root .env (workspace overrides org root).
+		// Each line: KEY=VALUE → stored as encrypted workspace secret.
+		envVars := map[string]string{}
+		if orgBaseDir != "" {
+			// 1. Org root .env (shared defaults)
+			parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars)
+			// 2. Workspace-specific .env (overrides)
+			if ws.FilesDir != "" {
+				parseEnvFile(filepath.Join(orgBaseDir, ws.FilesDir, ".env"), envVars)
+			}
+		}
+		// Store as workspace secrets via DB (encrypted if key is set, raw otherwise)
+		for key, value := range envVars {
+			var encrypted []byte
+			if crypto.IsEnabled() {
+				var err error
+				encrypted, err = crypto.Encrypt([]byte(value))
+				if err != nil {
+					log.Printf("Org import: failed to encrypt secret %s for %s: %v", key, ws.Name, err)
+					continue
+				}
+			} else {
+				encrypted = []byte(value) // store raw when encryption disabled
+			}
+			if _, err := db.DB.ExecContext(ctx, `
+				INSERT INTO workspace_secrets (workspace_id, key, encrypted_value)
+				VALUES ($1, $2, $3)
+				ON CONFLICT (workspace_id, key) DO UPDATE SET encrypted_value = $3, updated_at = now()
+			`, id, key, encrypted); err != nil {
+				log.Printf("Org import: failed to insert secret %s for %s: %v", key, ws.Name, err)
+			}
+		}
+
+		// #1084: limit concurrent Docker provisioning via semaphore.
+		provisionSem <- struct{}{} // acquire
+		go func(wID, tPath string, cFiles map[string][]byte, p models.CreateWorkspacePayload) {
+			defer func() { <-provisionSem }() // release
+			h.workspace.provisionWorkspace(wID, tPath, cFiles, p)
+		}(id, templatePath, configFiles, payload)
+	}
+
+	// Insert schedules if defined. Resolve each schedule's prompt body from
+	// either inline `prompt:` or `prompt_file:` (file ref relative to the
+	// workspace's files_dir). Inline wins; empty prompt after resolution is
+	// a configuration error (cron with no body would never do anything).
+	for _, sched := range ws.Schedules {
+		tz := sched.Timezone
+		if tz == "" {
+			tz = "UTC"
+		}
+		enabled := true
+		if sched.Enabled != nil {
+			enabled = *sched.Enabled
+		}
+		prompt, promptErr := resolvePromptRef(sched.Prompt, sched.PromptFile, orgBaseDir, ws.FilesDir)
+		if promptErr != nil {
+			log.Printf("Org import: failed to resolve prompt for schedule '%s' on %s: %v — skipping insert", sched.Name, ws.Name, promptErr)
+			continue
+		}
+		if prompt == "" {
+			log.Printf("Org import: schedule '%s' on %s has empty prompt (neither prompt nor prompt_file set) — skipping insert", sched.Name, ws.Name)
+			continue
+		}
+		// #722: surface the error rather than silently using time.Time{} (zero)
+		// which lib/pq stores as 0001-01-01 and may confuse the fire query.
+		nextRun, nextRunErr := scheduler.ComputeNextRun(sched.CronExpr, tz, time.Now())
+		if nextRunErr != nil {
+			log.Printf("Org import: invalid cron expression for schedule '%s' on %s: %v — skipping insert",
+				sched.Name, ws.Name, nextRunErr)
+			continue
+		}
+		if _, err := db.DB.ExecContext(context.Background(), orgImportScheduleSQL,
+			id, sched.Name, sched.CronExpr, tz, prompt, enabled, nextRun); err != nil {
+			log.Printf("Org import: failed to upsert schedule '%s' for %s: %v", sched.Name, ws.Name, err)
+		} else {
+			log.Printf("Org import: schedule '%s' (%s, %d chars) upserted for %s (source=template)", sched.Name, sched.CronExpr, len(prompt), ws.Name)
+		}
+	}
+
+	// Insert channels if defined (Telegram, Slack, etc.). Config values
+	// support ${VAR} expansion from .env files. The manager is reloaded
+	// once at the end of org import (in Import), not per-workspace.
+	channelEnv := loadWorkspaceEnv(orgBaseDir, ws.FilesDir)
+	wsChannelsCreated := []string{}
+	wsChannelsSkipped := []map[string]string{}
+	// skipChannel records a skipped channel with consistent shape across all reasons.
+	skipChannel := func(channelType, reason string) {
+		wsChannelsSkipped = append(wsChannelsSkipped, map[string]string{
+			"workspace": ws.Name,
+			"type":      channelType, // empty string when type field was missing
+			"reason":    reason,
+		})
+	}
+
+	for _, ch := range ws.Channels {
+		if ch.Type == "" {
+			skipChannel("", "empty type")
+			log.Printf("Org import: skipping channel with empty type for %s", ws.Name)
+			continue
+		}
+		// Validate adapter exists upfront — fail fast instead of inserting orphan rows
+		adapter, ok := channels.GetAdapter(ch.Type)
+		if !ok {
+			skipChannel(ch.Type, "unknown adapter")
+			log.Printf("Org import: skipping %s channel for %s — no adapter registered", ch.Type, ws.Name)
+			continue
+		}
+
+		expandedConfig := make(map[string]interface{}, len(ch.Config))
+		missing := []string{}
+		for k, v := range ch.Config {
+			expanded := expandWithEnv(v, channelEnv)
+			if hasUnresolvedVarRef(v, expanded) {
+				missing = append(missing, v)
+			}
+			expandedConfig[k] = expanded
+		}
+		if len(missing) > 0 {
+			skipChannel(ch.Type, fmt.Sprintf("missing env: %v", missing))
+			log.Printf("Org import: skipping %s channel for %s — env vars not set: %v", ch.Type, ws.Name, missing)
+			continue
+		}
+
+		// Adapter-level config validation
+		if err := adapter.ValidateConfig(expandedConfig); err != nil {
+			skipChannel(ch.Type, err.Error())
+			log.Printf("Org import: skipping %s channel for %s — invalid config: %v", ch.Type, ws.Name, err)
+			continue
+		}
+
+		configJSON, err := json.Marshal(expandedConfig)
+		if err != nil {
+			log.Printf("Org import: failed to marshal config for %s channel: %v", ch.Type, err)
+			continue
+		}
+		allowedJSON, err := json.Marshal(ch.AllowedUsers)
+		if err != nil {
+			log.Printf("Org import: failed to marshal allowed_users for %s channel: %v", ch.Type, err)
+			continue
+		}
+		enabled := true
+		if ch.Enabled != nil {
+			enabled = *ch.Enabled
+		}
+		// Idempotent insert — if same workspace+type already exists, update config
+		if _, err := db.DB.ExecContext(context.Background(), `
+			INSERT INTO workspace_channels (workspace_id, channel_type, channel_config, enabled, allowed_users)
+			VALUES ($1, $2, $3::jsonb, $4, $5::jsonb)
+			ON CONFLICT (workspace_id, channel_type) DO UPDATE
+			SET channel_config = EXCLUDED.channel_config,
+			    enabled = EXCLUDED.enabled,
+			    allowed_users = EXCLUDED.allowed_users,
+			    updated_at = now()
+		`, id, ch.Type, string(configJSON), enabled, string(allowedJSON)); err != nil {
+			log.Printf("Org import: failed to create %s channel for %s: %v", ch.Type, ws.Name, err)
+		} else {
+			wsChannelsCreated = append(wsChannelsCreated, ch.Type)
+			log.Printf("Org import: %s channel created for %s", ch.Type, ws.Name)
+		}
+	}
+
+	resultEntry := map[string]interface{}{
+		"id":   id,
+		"name": ws.Name,
+		"tier": tier,
+	}
+	if len(wsChannelsCreated) > 0 {
+		resultEntry["channels"] = wsChannelsCreated
+	}
+	if len(wsChannelsSkipped) > 0 {
+		resultEntry["channels_skipped"] = wsChannelsSkipped
+	}
+	*results = append(*results, resultEntry)
+
+	// Recurse into children. Brief pacing avoids overwhelming Docker when
+	// creating many containers in sequence; container provisioning runs in
+	// goroutines so the main createWorkspaceTree returns quickly.
+	for _, child := range ws.Children {
+		if err := h.createWorkspaceTree(child, &id, defaults, orgBaseDir, results, provisionSem); err != nil {
+			return err
+		}
+		time.Sleep(workspaceCreatePacingMs * time.Millisecond)
+	}
+
+	return nil
+}
+
+func countWorkspaces(workspaces []OrgWorkspace) int {
+	count := len(workspaces)
+	for _, ws := range workspaces {
+		count += countWorkspaces(ws.Children)
+	}
+	return count
+}
diff --git a/workspace-server/internal/handlers/ssrf.go b/workspace-server/internal/handlers/ssrf.go
new file mode 100644
index 00000000..09bb2774
--- /dev/null
+++ b/workspace-server/internal/handlers/ssrf.go
@@ -0,0 +1,90 @@
+package handlers
+
+import (
+	"fmt"
+	"net"
+	"net/url"
+	"path/filepath"
+	"strings"
+)
+
+// isSafeURL validates that a URL resolves to a publicly-routable address,
+// preventing A2A requests from being redirected to internal/cloud-metadata
+// infrastructure (SSRF, CWE-918). Workspace URLs come from DB/Redis caches
+// so we validate before making any outbound HTTP call.
+func isSafeURL(rawURL string) error {
+	u, err := url.Parse(rawURL)
+	if err != nil {
+		return fmt.Errorf("invalid URL: %w", err)
+	}
+	// Reject non-HTTP(S) schemes.
+	if u.Scheme != "http" && u.Scheme != "https" {
+		return fmt.Errorf("forbidden scheme: %s (only http/https allowed)", u.Scheme)
+	}
+	host := u.Hostname()
+	if host == "" {
+		return fmt.Errorf("empty hostname")
+	}
+	// Block direct IP addresses.
+	if ip := net.ParseIP(host); ip != nil {
+		if ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() {
+			return fmt.Errorf("forbidden loopback/unspecified IP: %s", ip)
+		}
+		if isPrivateOrMetadataIP(ip) {
+			return fmt.Errorf("forbidden private/metadata IP: %s", ip)
+		}
+		return nil
+	}
+	// For hostnames, resolve and validate each returned IP.
+	addrs, err := net.LookupHost(host)
+	if err != nil {
+		// DNS resolution failure — block it. Could be an internal hostname.
+		return fmt.Errorf("DNS resolution blocked for hostname: %s (%v)", host, err)
+	}
+	if len(addrs) == 0 {
+		return fmt.Errorf("DNS returned no addresses for: %s", host)
+	}
+	for _, addr := range addrs {
+		ip := net.ParseIP(addr)
+		if ip != nil && (ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || isPrivateOrMetadataIP(ip)) {
+			return fmt.Errorf("hostname %s resolves to forbidden IP: %s", host, ip)
+		}
+	}
+	return nil
+}
+
+// isPrivateOrMetadataIP returns true for RFC-1918 private, carrier-grade NAT,
+// link-local, and cloud metadata ranges.
+func isPrivateOrMetadataIP(ip net.IP) bool {
+	var privateRanges = []net.IPNet{
+		{IP: net.ParseIP("10.0.0.0"), Mask: net.CIDRMask(8, 32)},
+		{IP: net.ParseIP("172.16.0.0"), Mask: net.CIDRMask(12, 32)},
+		{IP: net.ParseIP("192.168.0.0"), Mask: net.CIDRMask(16, 32)},
+		{IP: net.ParseIP("169.254.0.0"), Mask: net.CIDRMask(16, 32)},
+		{IP: net.ParseIP("100.64.0.0"), Mask: net.CIDRMask(10, 32)},
+		{IP: net.ParseIP("192.0.2.0"), Mask: net.CIDRMask(24, 32)},
+		{IP: net.ParseIP("198.51.100.0"), Mask: net.CIDRMask(24, 32)},
+		{IP: net.ParseIP("203.0.113.0"), Mask: net.CIDRMask(24, 32)},
+	}
+	ip = ip.To4()
+	if ip == nil {
+		return false
+	}
+	for _, r := range privateRanges {
+		if r.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
+
+// validateRelPath checks that a file path is relative and does not escape
+// the destination via absolute paths or ".." traversal. Used by
+// copyFilesToContainer and deleteViaEphemeral as a defence-in-depth measure.
+func validateRelPath(filePath string) error {
+	clean := filepath.Clean(filePath)
+	if filepath.IsAbs(clean) || strings.Contains(clean, "..") {
+		return fmt.Errorf("path traversal or absolute path not allowed: %s", filePath)
+	}
+	return nil
+}
\ No newline at end of file
diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go
index 8b534f70..6af680f1 100644
--- a/workspace-server/internal/handlers/workspace.go
+++ b/workspace-server/internal/handlers/workspace.go
@@ -1,5 +1,9 @@
 package handlers
 
+// workspace.go — WorkspaceHandler struct, constructor, Create, List, Get,
+// and the shared scanWorkspaceRow helper. State/Update/Delete and validators
+// live in workspace_crud.go.
+
 import (
 	"context"
 	"database/sql"
@@ -16,10 +20,8 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
 	"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
 	"github.com/gin-gonic/gin"
-	"github.com/lib/pq"
 	"github.com/google/uuid"
 )
 
@@ -303,7 +305,7 @@ func scanWorkspaceRow(rows interface {
 	Scan(dest ...interface{}) error
 }) (map[string]interface{}, error) {
 	var id, name, role, status, url, sampleError, currentTask, runtime, workspaceDir string
-	var tier, activeTasks, uptimeSeconds int
+	var tier, activeTasks, maxConcurrentTasks, uptimeSeconds int
 	var errorRate, x, y float64
 	var collapsed bool
 	var parentID *string
@@ -312,7 +314,7 @@ func scanWorkspaceRow(rows interface {
 	var monthlySpend int64
 
 	err := rows.Scan(&id, &name, &role, &tier, &status, &agentCard, &url,
-		&parentID, &activeTasks, &errorRate, &sampleError, &uptimeSeconds,
+		&parentID, &activeTasks, &maxConcurrentTasks, &errorRate, &sampleError, &uptimeSeconds,
 		&currentTask, &runtime, &workspaceDir, &x, &y, &collapsed,
 		&budgetLimit, &monthlySpend)
 	if err != nil {
@@ -326,8 +328,9 @@ func scanWorkspaceRow(rows interface {
 		"status":            status,
 		"url":               url,
 		"parent_id":         parentID,
-		"active_tasks":      activeTasks,
-		"last_error_rate":   errorRate,
+		"active_tasks":          activeTasks,
+		"max_concurrent_tasks":  maxConcurrentTasks,
+		"last_error_rate":       errorRate,
 		"last_sample_error": sampleError,
 		"uptime_seconds":    uptimeSeconds,
 		"current_task":      currentTask,
@@ -366,7 +369,8 @@ func scanWorkspaceRow(rows interface {
 const workspaceListQuery = `
 	SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
 		   COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
-		   w.parent_id, w.active_tasks, w.last_error_rate,
+		   w.parent_id, w.active_tasks, COALESCE(w.max_concurrent_tasks, 1),
+		   w.last_error_rate,
 		   COALESCE(w.last_sample_error, ''), w.uptime_seconds,
 		   COALESCE(w.current_task, ''), COALESCE(w.runtime, 'langgraph'),
 		   COALESCE(w.workspace_dir, ''),
@@ -418,7 +422,8 @@ func (h *WorkspaceHandler) Get(c *gin.Context) {
 	row := db.DB.QueryRowContext(c.Request.Context(), `
 		SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
 			   COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
-			   w.parent_id, w.active_tasks, w.last_error_rate,
+			   w.parent_id, w.active_tasks, COALESCE(w.max_concurrent_tasks, 1),
+			   w.last_error_rate,
 			   COALESCE(w.last_sample_error, ''), w.uptime_seconds,
 			   COALESCE(w.current_task, ''), COALESCE(w.runtime, 'langgraph'),
 			   COALESCE(w.workspace_dir, ''),
@@ -462,473 +467,3 @@ func (h *WorkspaceHandler) Get(c *gin.Context) {
 
 	c.JSON(http.StatusOK, ws)
 }
-
-// State handles GET /workspaces/:id/state — minimal status payload for
-// remote-agent polling (Phase 30.4). Returns `{status, paused, deleted,
-// workspace_id}` so a remote agent can detect pause/resume/delete
-// without needing WebSocket reachability from the platform.
-//
-// Auth: Phase 30.1 bearer token required when the workspace has any
-// live token on file; legacy workspaces grandfathered. Uses the same
-// fail-closed posture as secrets.Values — polling this cadence with
-// unauth'd callers would be a trivial DoS / workspace-status-scanner
-// otherwise.
-//
-// The endpoint is deliberately NOT merged with GET /workspaces/:id:
-// that handler is optimized for canvas (returns config, agent_card,
-// position, …) and is unauthenticated by design. State is the
-// agent-machinery polling path — tight, token-gated, cache-friendly.
-func (h *WorkspaceHandler) State(c *gin.Context) {
-	workspaceID := c.Param("id")
-	ctx := c.Request.Context()
-
-	// Auth gate — same shape as secrets.Values (Phase 30.2). Fail-closed
-	// on DB errors because the caller is about to poll this at ~60s
-	// cadence; letting unauth'd callers through on a hiccup turns this
-	// into a workspace-status scanner.
-	hasLive, hlErr := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
-	if hlErr != nil {
-		log.Printf("wsauth: HasAnyLiveToken(%s) failed for workspace.State: %v", workspaceID, hlErr)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
-		return
-	}
-	if hasLive {
-		tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
-		if tok == "" {
-			c.JSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
-			return
-		}
-		if err := wsauth.ValidateToken(ctx, db.DB, workspaceID, tok); err != nil {
-			c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid workspace auth token"})
-			return
-		}
-	}
-
-	var status string
-	err := db.DB.QueryRowContext(ctx, `
-		SELECT status
-		FROM workspaces
-		WHERE id = $1
-	`, workspaceID).Scan(&status)
-	if err == sql.ErrNoRows {
-		// A deleted workspace row no longer exists — remote agent should
-		// interpret 404 as "shut yourself down" (our pause path uses
-		// status='removed' but keeps the row; a 404 here means the
-		// workspace was hard-deleted out from under the agent).
-		c.JSON(http.StatusNotFound, gin.H{
-			"workspace_id": workspaceID,
-			"deleted":      true,
-		})
-		return
-	}
-	if err != nil {
-		log.Printf("workspace.State query error for %s: %v", workspaceID, err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
-		return
-	}
-
-	// Two delete paths: hard-delete (sql.ErrNoRows above → 404) AND
-	// soft-delete (status='removed' → also return 404 here so the SDK
-	// doesn't have to remember "is it 200 with deleted=true OR 404 with
-	// deleted=true?"). Same shape, same status code, same flag set.
-	if status == "removed" {
-		c.JSON(http.StatusNotFound, gin.H{
-			"workspace_id": workspaceID,
-			"status":       "removed",
-			"deleted":      true,
-		})
-		return
-	}
-
-	c.JSON(http.StatusOK, gin.H{
-		"workspace_id": workspaceID,
-		"status":       status,
-		"paused":       status == "paused",
-		"deleted":      false,
-	})
-}
-
-// sensitiveUpdateFields documents fields that carry elevated risk — kept as
-// an explicit list for code readability and future audits. Auth is now fully
-// enforced at the router layer (WorkspaceAuth middleware, #680 IDOR fix);
-// this map is no longer used for in-handler gate logic but is preserved to
-// surface the risk classification clearly.
-//
-// budget_limit is intentionally NOT here — the dedicated PATCH
-// /workspaces/:id/budget (AdminAuth) is the only write path (#611).
-var sensitiveUpdateFields = map[string]struct{}{
-	"tier":          {},
-	"parent_id":     {},
-	"runtime":       {},
-	"workspace_dir": {},
-}
-
-// Update handles PATCH /workspaces/:id
-func (h *WorkspaceHandler) Update(c *gin.Context) {
-	id := c.Param("id")
-
-	// #687: reject non-UUID IDs before hitting the DB.
-	if err := validateWorkspaceID(id); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
-		return
-	}
-
-	var body map[string]interface{}
-	if err := c.ShouldBindJSON(&body); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
-		return
-	}
-
-	// #685/#688: validate string fields for length and injection safety.
-	strField := func(key string) string {
-		if v, ok := body[key]; ok {
-			if s, ok := v.(string); ok {
-				return s
-			}
-		}
-		return ""
-	}
-	if err := validateWorkspaceFields(
-		strField("name"), strField("role"), "" /*model not patchable*/, strField("runtime"),
-	); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace fields"})
-		return
-	}
-
-	ctx := c.Request.Context()
-
-	// Auth is fully enforced at the router layer (WorkspaceAuth middleware, #680).
-	// WorkspaceAuth validates that the caller holds a valid bearer token for this
-	// specific workspace — no additional auth gate is needed here. The
-	// sensitiveUpdateFields map above documents the risk classification for
-	// auditors but is no longer used as a runtime gate.
-
-	// #120: guard — return 404 for nonexistent workspace IDs instead of
-	// silently applying zero-row UPDATEs and returning 200.
-	var exists bool
-	if err := db.DB.QueryRowContext(ctx,
-		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`, id,
-	).Scan(&exists); err != nil || !exists {
-		c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
-		return
-	}
-
-	if name, ok := body["name"]; ok {
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET name = $2, updated_at = now() WHERE id = $1`, id, name); err != nil {
-			log.Printf("Update name error for %s: %v", id, err)
-		}
-	}
-	if role, ok := body["role"]; ok {
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET role = $2, updated_at = now() WHERE id = $1`, id, role); err != nil {
-			log.Printf("Update role error for %s: %v", id, err)
-		}
-	}
-	if tier, ok := body["tier"]; ok {
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET tier = $2, updated_at = now() WHERE id = $1`, id, tier); err != nil {
-			log.Printf("Update tier error for %s: %v", id, err)
-		}
-	}
-	if parentID, ok := body["parent_id"]; ok {
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET parent_id = $2, updated_at = now() WHERE id = $1`, id, parentID); err != nil {
-			log.Printf("Update parent_id error for %s: %v", id, err)
-		}
-	}
-	if runtime, ok := body["runtime"]; ok {
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET runtime = $2, updated_at = now() WHERE id = $1`, id, runtime); err != nil {
-			log.Printf("Update runtime error for %s: %v", id, err)
-		}
-	}
-	needsRestart := false
-	if wsDir, ok := body["workspace_dir"]; ok {
-		// Allow null to clear workspace_dir
-		if wsDir != nil {
-			if dirStr, isStr := wsDir.(string); isStr && dirStr != "" {
-				if err := validateWorkspaceDir(dirStr); err != nil {
-					c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace directory"})
-					return
-				}
-			}
-		}
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET workspace_dir = $2, updated_at = now() WHERE id = $1`, id, wsDir); err != nil {
-			log.Printf("Update workspace_dir error for %s: %v", id, err)
-		}
-		needsRestart = true
-	}
-	// NOTE: budget_limit is intentionally NOT handled here. The dedicated
-	// PATCH /workspaces/:id/budget (AdminAuth) is the only write path.
-	// This endpoint uses ValidateAnyToken — any enrolled workspace bearer
-	// could otherwise self-clear its own spending ceiling. (#611 Security Auditor)
-
-	// Update canvas position if both x and y provided
-	if x, xOk := body["x"]; xOk {
-		if y, yOk := body["y"]; yOk {
-			if _, err := db.DB.ExecContext(ctx, `
-				INSERT INTO canvas_layouts (workspace_id, x, y)
-				VALUES ($1, $2, $3)
-				ON CONFLICT (workspace_id) DO UPDATE SET x = EXCLUDED.x, y = EXCLUDED.y
-			`, id, x, y); err != nil {
-				log.Printf("Update position error for %s: %v", id, err)
-			}
-		}
-	}
-
-	resp := gin.H{"status": "updated"}
-	if needsRestart {
-		resp["needs_restart"] = true
-	}
-	c.JSON(http.StatusOK, resp)
-}
-
-// validateWorkspaceDir checks that a workspace_dir path is safe to bind-mount.
-func validateWorkspaceDir(dir string) error {
-	if !filepath.IsAbs(dir) {
-		return fmt.Errorf("workspace_dir must be an absolute path")
-	}
-	if strings.Contains(dir, "..") {
-		return fmt.Errorf("workspace_dir must not contain '..'")
-	}
-	// Reject system-critical paths
-	clean := filepath.Clean(dir)
-	for _, blocked := range []string{"/etc", "/var", "/proc", "/sys", "/dev", "/boot", "/sbin", "/bin", "/lib", "/usr"} {
-		if clean == blocked || strings.HasPrefix(clean, blocked+"/") {
-			return fmt.Errorf("workspace_dir must not be a system path (%s)", blocked)
-		}
-	}
-	return nil
-}
-
-// Delete handles DELETE /workspaces/:id
-// If the workspace has children (is a team), cascade deletes all sub-workspaces.
-// Use ?confirm=true to actually delete (otherwise returns children list for confirmation).
-func (h *WorkspaceHandler) Delete(c *gin.Context) {
-	id := c.Param("id")
-	ctx := c.Request.Context()
-	confirm := c.Query("confirm") == "true"
-
-	// #687: reject non-UUID IDs before hitting the DB.
-	if err := validateWorkspaceID(id); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
-		return
-	}
-
-	// Check for children
-	rows, err := db.DB.QueryContext(ctx,
-		`SELECT id, name FROM workspaces WHERE parent_id = $1 AND status != 'removed'`, id)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to check children"})
-		return
-	}
-	defer rows.Close()
-
-	var children []map[string]string
-	for rows.Next() {
-		var childID, childName string
-		if rows.Scan(&childID, &childName) == nil {
-			children = append(children, map[string]string{"id": childID, "name": childName})
-		}
-	}
-	if err := rows.Err(); err != nil {
-		log.Printf("Delete: child rows error: %v", err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to check children"})
-		return
-	}
-
-	// If has children and not confirmed, return children list for confirmation.
-	// Uses HTTP 409 Conflict (not 200) so `curl --fail`, `fetch().ok`, and any
-	// client that treats HTTP 4xx as an error surfaces the confirmation
-	// requirement. Body shape unchanged so the canvas UI's parser keeps
-	// working. Fixes #88.
-	if len(children) > 0 && !confirm {
-		c.JSON(http.StatusConflict, gin.H{
-			"status":         "confirmation_required",
-			"message":        "This workspace has sub-workspaces. Delete with ?confirm=true to cascade delete.",
-			"children":       children,
-			"children_count": len(children),
-		})
-		return
-	}
-
-	// Cascade delete: collect ALL descendants (not just direct children) via
-	// recursive CTE, then stop each container and remove each volume.
-	// Previous bug: only direct children's containers were stopped, leaving
-	// grandchildren as orphan running containers after a cascade delete.
-	descendantIDs := []string{}
-	if len(children) > 0 {
-		descRows, err := db.DB.QueryContext(ctx, `
-			WITH RECURSIVE descendants AS (
-				SELECT id FROM workspaces WHERE parent_id = $1 AND status != 'removed'
-				UNION ALL
-				SELECT w.id FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status != 'removed'
-			)
-			SELECT id FROM descendants
-		`, id)
-		if err != nil {
-			log.Printf("Delete: descendant query error for %s: %v", id, err)
-		} else {
-			for descRows.Next() {
-				var descID string
-				if descRows.Scan(&descID) == nil {
-					descendantIDs = append(descendantIDs, descID)
-				}
-			}
-			descRows.Close()
-		}
-	}
-
-	// #73 fix: mark rows 'removed' in the DB FIRST, BEFORE stopping containers
-	// or removing volumes. Previously the sequence was stop → update-status,
-	// which left a gap where:
-	//   - the container's last pre-teardown heartbeat could resurrect the row
-	//     via the register-handler UPSERT (now also guarded in #73)
-	//   - the liveness monitor could observe 'online' status + expired Redis
-	//     TTL and trigger RestartByID, recreating a container we're trying
-	//     to destroy
-	// Marking 'removed' first makes both of those paths no-op via their
-	// existing `status NOT IN ('removed', ...)` guards.
-	allIDs := append([]string{id}, descendantIDs...)
-	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspaces SET status = 'removed', updated_at = now() WHERE id = ANY($1::uuid[])`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("Delete status update error for %s: %v", id, err)
-	}
-	if _, err := db.DB.ExecContext(ctx,
-		`DELETE FROM canvas_layouts WHERE workspace_id = ANY($1::uuid[])`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("Delete canvas_layouts error for %s: %v", id, err)
-	}
-	// Revoke all auth tokens for the deleted workspaces. Once the workspace is
-	// gone its tokens are meaningless; leaving them alive would keep
-	// HasAnyLiveTokenGlobal = true even after the platform is otherwise empty,
-	// which prevents AdminAuth from returning to fail-open and breaks the E2E
-	// test's count-zero assertion (and local re-run cleanup).
-	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspace_auth_tokens SET revoked_at = now()
-		 WHERE workspace_id = ANY($1::uuid[]) AND revoked_at IS NULL`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("Delete token revocation error for %s: %v", id, err)
-	}
-// #1027: cascade-disable all schedules for the deleted workspaces so
-	// the scheduler never fires a cron into a removed container.
-	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspace_schedules SET enabled = false, updated_at = now()
-		 WHERE workspace_id = ANY($1::uuid[]) AND enabled = true`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("Delete schedule disable error for %s: %v", id, err)
-	}
-
-	// Now stop containers + remove volumes for all descendants (any depth).
-	// Any concurrent heartbeat / registration / liveness-triggered restart
-	// will see status='removed' and bail out early.
-	for _, descID := range descendantIDs {
-		if h.provisioner != nil {
-			h.provisioner.Stop(ctx, descID)
-			if err := h.provisioner.RemoveVolume(ctx, descID); err != nil {
-				log.Printf("Delete descendant %s volume removal warning: %v", descID, err)
-			}
-		}
-		db.ClearWorkspaceKeys(ctx, descID)
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", descID, map[string]interface{}{})
-	}
-
-	// Stop + remove volume for the workspace itself
-	if h.provisioner != nil {
-		h.provisioner.Stop(ctx, id)
-		if err := h.provisioner.RemoveVolume(ctx, id); err != nil {
-			log.Printf("Delete %s volume removal warning: %v", id, err)
-		}
-	}
-	db.ClearWorkspaceKeys(ctx, id)
-
-	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", id, map[string]interface{}{
-		"cascade_deleted": len(descendantIDs),
-	})
-
-	// Hard purge: cascade delete all FK data and remove the DB row entirely (#1087)
-	if c.Query("purge") == "true" {
-		purgeIDs := pq.Array(allIDs)
-		// Order matters: delete from leaf tables first, then workspace row
-		for _, table := range []string{
-			"agent_memories", "activity_logs", "workspace_secrets",
-			"workspace_channels", "workspace_config", "workspace_memory",
-			"workspace_token_usage", "approval_requests", "audit_events",
-			"workflow_checkpoints", "workspace_artifacts", "agents",
-			"workspace_auth_tokens", "workspace_schedules", "canvas_layouts",
-		} {
-			if _, err := db.DB.ExecContext(ctx,
-				fmt.Sprintf("DELETE FROM %s WHERE workspace_id = ANY($1::uuid[])", table),
-				purgeIDs); err != nil {
-				log.Printf("Purge %s error for %v: %v", table, allIDs, err)
-			}
-		}
-		// Null out parent_id / forwarded_to references
-		db.DB.ExecContext(ctx, "UPDATE workspaces SET parent_id = NULL WHERE parent_id = ANY($1::uuid[])", purgeIDs)
-		db.DB.ExecContext(ctx, "UPDATE workspaces SET forwarded_to = NULL WHERE forwarded_to = ANY($1::uuid[])", purgeIDs)
-		// Hard delete the workspace row
-		if _, err := db.DB.ExecContext(ctx, "DELETE FROM workspaces WHERE id = ANY($1::uuid[])", purgeIDs); err != nil {
-			log.Printf("Purge workspace row error for %v: %v", allIDs, err)
-			c.JSON(http.StatusInternalServerError, gin.H{"error": "purge failed"})
-			return
-		}
-		c.JSON(http.StatusOK, gin.H{"status": "purged", "cascade_deleted": len(descendantIDs)})
-		return
-	}
-
-	c.JSON(http.StatusOK, gin.H{"status": "removed", "cascade_deleted": len(descendantIDs)})
-}
-
-// validateWorkspaceID returns an error when id is not a valid UUID.
-// #687: prevents 500s from Postgres when a garbage string (e.g. ../../etc/passwd)
-// is passed as the :id path parameter.
-func validateWorkspaceID(id string) error {
-	if _, err := uuid.Parse(id); err != nil {
-		return fmt.Errorf("invalid workspace id")
-	}
-	return nil
-}
-
-// yamlSpecialChars is the set of YAML-special characters banned from workspace
-// name and role. Newlines are handled separately below (same error message for
-// all four fields); these additional characters target YAML block indicators,
-// flow-sequence/mapping delimiters, and shell-expansion metacharacters that
-// yamlQuote does NOT escape inside a double-quoted scalar (#685).
-const yamlSpecialChars = "{}[]|>*&!"
-
-// validateWorkspaceFields enforces maximum field lengths and rejects characters
-// that could enable YAML-injection in downstream provisioning paths.
-// #685 (defence-in-depth over yamlQuote — newline + YAML-special chars in name/role),
-// #688 (max field lengths).
-func validateWorkspaceFields(name, role, model, runtime string) error {
-	// All four fields: reject newline / carriage-return.
-	for _, f := range []struct{ label, val string }{
-		{"name", name},
-		{"role", role},
-		{"model", model},
-		{"runtime", runtime},
-	} {
-		if strings.ContainsAny(f.val, "\n\r") {
-			return fmt.Errorf("%s must not contain newline characters", f.label)
-		}
-	}
-	// name and role only: reject YAML-special characters (#685).
-	for _, f := range []struct{ label, val string }{
-		{"name", name},
-		{"role", role},
-	} {
-		if strings.ContainsAny(f.val, yamlSpecialChars) {
-			return fmt.Errorf("%s contains invalid characters", f.label)
-		}
-	}
-	if len(name) > 255 {
-		return fmt.Errorf("name must be at most 255 characters")
-	}
-	if len(role) > 1000 {
-		return fmt.Errorf("role must be at most 1000 characters")
-	}
-	if len(model) > 100 {
-		return fmt.Errorf("model must be at most 100 characters")
-	}
-	if len(runtime) > 100 {
-		return fmt.Errorf("runtime must be at most 100 characters")
-	}
-	return nil
-}
diff --git a/workspace-server/internal/handlers/workspace_crud.go b/workspace-server/internal/handlers/workspace_crud.go
new file mode 100644
index 00000000..741ac5c2
--- /dev/null
+++ b/workspace-server/internal/handlers/workspace_crud.go
@@ -0,0 +1,489 @@
+package handlers
+
+// workspace_crud.go — workspace state queries, updates, deletion, and
+// field validation. Covers State (polling endpoint), Update (PATCH),
+// Delete (cascade + purge), and input validation helpers.
+
+import (
+	"database/sql"
+	"fmt"
+	"log"
+	"net/http"
+	"path/filepath"
+	"strings"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+	"github.com/lib/pq"
+)
+// State handles GET /workspaces/:id/state — minimal status payload for
+// remote-agent polling (Phase 30.4). Returns `{status, paused, deleted,
+// workspace_id}` so a remote agent can detect pause/resume/delete
+// without needing WebSocket reachability from the platform.
+//
+// Auth: Phase 30.1 bearer token required when the workspace has any
+// live token on file; legacy workspaces grandfathered. Uses the same
+// fail-closed posture as secrets.Values — polling this cadence with
+// unauth'd callers would be a trivial DoS / workspace-status-scanner
+// otherwise.
+//
+// The endpoint is deliberately NOT merged with GET /workspaces/:id:
+// that handler is optimized for canvas (returns config, agent_card,
+// position, …) and is unauthenticated by design. State is the
+// agent-machinery polling path — tight, token-gated, cache-friendly.
+func (h *WorkspaceHandler) State(c *gin.Context) {
+	workspaceID := c.Param("id")
+	ctx := c.Request.Context()
+
+	// Auth gate — same shape as secrets.Values (Phase 30.2). Fail-closed
+	// on DB errors because the caller is about to poll this at ~60s
+	// cadence; letting unauth'd callers through on a hiccup turns this
+	// into a workspace-status scanner.
+	hasLive, hlErr := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
+	if hlErr != nil {
+		log.Printf("wsauth: HasAnyLiveToken(%s) failed for workspace.State: %v", workspaceID, hlErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
+		return
+	}
+	if hasLive {
+		tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
+		if tok == "" {
+			c.JSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
+			return
+		}
+		if err := wsauth.ValidateToken(ctx, db.DB, workspaceID, tok); err != nil {
+			c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid workspace auth token"})
+			return
+		}
+	}
+
+	var status string
+	err := db.DB.QueryRowContext(ctx, `
+		SELECT status
+		FROM workspaces
+		WHERE id = $1
+	`, workspaceID).Scan(&status)
+	if err == sql.ErrNoRows {
+		// A deleted workspace row no longer exists — remote agent should
+		// interpret 404 as "shut yourself down" (our pause path uses
+		// status='removed' but keeps the row; a 404 here means the
+		// workspace was hard-deleted out from under the agent).
+		c.JSON(http.StatusNotFound, gin.H{
+			"workspace_id": workspaceID,
+			"deleted":      true,
+		})
+		return
+	}
+	if err != nil {
+		log.Printf("workspace.State query error for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
+		return
+	}
+
+	// Two delete paths: hard-delete (sql.ErrNoRows above → 404) AND
+	// soft-delete (status='removed' → also return 404 here so the SDK
+	// doesn't have to remember "is it 200 with deleted=true OR 404 with
+	// deleted=true?"). Same shape, same status code, same flag set.
+	if status == "removed" {
+		c.JSON(http.StatusNotFound, gin.H{
+			"workspace_id": workspaceID,
+			"status":       "removed",
+			"deleted":      true,
+		})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"workspace_id": workspaceID,
+		"status":       status,
+		"paused":       status == "paused",
+		"deleted":      false,
+	})
+}
+
+// sensitiveUpdateFields documents fields that carry elevated risk — kept as
+// an explicit list for code readability and future audits. Auth is now fully
+// enforced at the router layer (WorkspaceAuth middleware, #680 IDOR fix);
+// this map is no longer used for in-handler gate logic but is preserved to
+// surface the risk classification clearly.
+//
+// budget_limit is intentionally NOT here — the dedicated PATCH
+// /workspaces/:id/budget (AdminAuth) is the only write path (#611).
+var sensitiveUpdateFields = map[string]struct{}{
+	"tier":          {},
+	"parent_id":     {},
+	"runtime":       {},
+	"workspace_dir": {},
+}
+
+// Update handles PATCH /workspaces/:id
+func (h *WorkspaceHandler) Update(c *gin.Context) {
+	id := c.Param("id")
+
+	// #687: reject non-UUID IDs before hitting the DB.
+	if err := validateWorkspaceID(id); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
+		return
+	}
+
+	var body map[string]interface{}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
+		return
+	}
+
+	// #685/#688: validate string fields for length and injection safety.
+	strField := func(key string) string {
+		if v, ok := body[key]; ok {
+			if s, ok := v.(string); ok {
+				return s
+			}
+		}
+		return ""
+	}
+	if err := validateWorkspaceFields(
+		strField("name"), strField("role"), "" /*model not patchable*/, strField("runtime"),
+	); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace fields"})
+		return
+	}
+
+	ctx := c.Request.Context()
+
+	// Auth is fully enforced at the router layer (WorkspaceAuth middleware, #680).
+	// WorkspaceAuth validates that the caller holds a valid bearer token for this
+	// specific workspace — no additional auth gate is needed here. The
+	// sensitiveUpdateFields map above documents the risk classification for
+	// auditors but is no longer used as a runtime gate.
+
+	// #120: guard — return 404 for nonexistent workspace IDs instead of
+	// silently applying zero-row UPDATEs and returning 200.
+	var exists bool
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1)`, id,
+	).Scan(&exists); err != nil || !exists {
+		c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
+		return
+	}
+
+	if name, ok := body["name"]; ok {
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET name = $2, updated_at = now() WHERE id = $1`, id, name); err != nil {
+			log.Printf("Update name error for %s: %v", id, err)
+		}
+	}
+	if role, ok := body["role"]; ok {
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET role = $2, updated_at = now() WHERE id = $1`, id, role); err != nil {
+			log.Printf("Update role error for %s: %v", id, err)
+		}
+	}
+	if tier, ok := body["tier"]; ok {
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET tier = $2, updated_at = now() WHERE id = $1`, id, tier); err != nil {
+			log.Printf("Update tier error for %s: %v", id, err)
+		}
+	}
+	if parentID, ok := body["parent_id"]; ok {
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET parent_id = $2, updated_at = now() WHERE id = $1`, id, parentID); err != nil {
+			log.Printf("Update parent_id error for %s: %v", id, err)
+		}
+	}
+	if runtime, ok := body["runtime"]; ok {
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET runtime = $2, updated_at = now() WHERE id = $1`, id, runtime); err != nil {
+			log.Printf("Update runtime error for %s: %v", id, err)
+		}
+	}
+	needsRestart := false
+	if wsDir, ok := body["workspace_dir"]; ok {
+		// Allow null to clear workspace_dir
+		if wsDir != nil {
+			if dirStr, isStr := wsDir.(string); isStr && dirStr != "" {
+				if err := validateWorkspaceDir(dirStr); err != nil {
+					c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace directory"})
+					return
+				}
+			}
+		}
+		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET workspace_dir = $2, updated_at = now() WHERE id = $1`, id, wsDir); err != nil {
+			log.Printf("Update workspace_dir error for %s: %v", id, err)
+		}
+		needsRestart = true
+	}
+	// NOTE: budget_limit is intentionally NOT handled here. The dedicated
+	// PATCH /workspaces/:id/budget (AdminAuth) is the only write path.
+	// This endpoint uses ValidateAnyToken — any enrolled workspace bearer
+	// could otherwise self-clear its own spending ceiling. (#611 Security Auditor)
+
+	// Update canvas position if both x and y provided
+	if x, xOk := body["x"]; xOk {
+		if y, yOk := body["y"]; yOk {
+			if _, err := db.DB.ExecContext(ctx, `
+				INSERT INTO canvas_layouts (workspace_id, x, y)
+				VALUES ($1, $2, $3)
+				ON CONFLICT (workspace_id) DO UPDATE SET x = EXCLUDED.x, y = EXCLUDED.y
+			`, id, x, y); err != nil {
+				log.Printf("Update position error for %s: %v", id, err)
+			}
+		}
+	}
+
+	resp := gin.H{"status": "updated"}
+	if needsRestart {
+		resp["needs_restart"] = true
+	}
+	c.JSON(http.StatusOK, resp)
+}
+
+// validateWorkspaceDir checks that a workspace_dir path is safe to bind-mount.
+func validateWorkspaceDir(dir string) error {
+	if !filepath.IsAbs(dir) {
+		return fmt.Errorf("workspace_dir must be an absolute path")
+	}
+	if strings.Contains(dir, "..") {
+		return fmt.Errorf("workspace_dir must not contain '..'")
+	}
+	// Reject system-critical paths
+	clean := filepath.Clean(dir)
+	for _, blocked := range []string{"/etc", "/var", "/proc", "/sys", "/dev", "/boot", "/sbin", "/bin", "/lib", "/usr"} {
+		if clean == blocked || strings.HasPrefix(clean, blocked+"/") {
+			return fmt.Errorf("workspace_dir must not be a system path (%s)", blocked)
+		}
+	}
+	return nil
+}
+
+// Delete handles DELETE /workspaces/:id
+// If the workspace has children (is a team), cascade deletes all sub-workspaces.
+// Use ?confirm=true to actually delete (otherwise returns children list for confirmation).
+func (h *WorkspaceHandler) Delete(c *gin.Context) {
+	id := c.Param("id")
+	ctx := c.Request.Context()
+	confirm := c.Query("confirm") == "true"
+
+	// #687: reject non-UUID IDs before hitting the DB.
+	if err := validateWorkspaceID(id); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
+		return
+	}
+
+	// Check for children
+	rows, err := db.DB.QueryContext(ctx,
+		`SELECT id, name FROM workspaces WHERE parent_id = $1 AND status != 'removed'`, id)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to check children"})
+		return
+	}
+	defer rows.Close()
+
+	var children []map[string]string
+	for rows.Next() {
+		var childID, childName string
+		if rows.Scan(&childID, &childName) == nil {
+			children = append(children, map[string]string{"id": childID, "name": childName})
+		}
+	}
+	if err := rows.Err(); err != nil {
+		log.Printf("Delete: child rows error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to check children"})
+		return
+	}
+
+	// If has children and not confirmed, return children list for confirmation.
+	// Uses HTTP 409 Conflict (not 200) so `curl --fail`, `fetch().ok`, and any
+	// client that treats HTTP 4xx as an error surfaces the confirmation
+	// requirement. Body shape unchanged so the canvas UI's parser keeps
+	// working. Fixes #88.
+	if len(children) > 0 && !confirm {
+		c.JSON(http.StatusConflict, gin.H{
+			"status":         "confirmation_required",
+			"message":        "This workspace has sub-workspaces. Delete with ?confirm=true to cascade delete.",
+			"children":       children,
+			"children_count": len(children),
+		})
+		return
+	}
+
+	// Cascade delete: collect ALL descendants (not just direct children) via
+	// recursive CTE, then stop each container and remove each volume.
+	// Previous bug: only direct children's containers were stopped, leaving
+	// grandchildren as orphan running containers after a cascade delete.
+	descendantIDs := []string{}
+	if len(children) > 0 {
+		descRows, err := db.DB.QueryContext(ctx, `
+			WITH RECURSIVE descendants AS (
+				SELECT id FROM workspaces WHERE parent_id = $1 AND status != 'removed'
+				UNION ALL
+				SELECT w.id FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status != 'removed'
+			)
+			SELECT id FROM descendants
+		`, id)
+		if err != nil {
+			log.Printf("Delete: descendant query error for %s: %v", id, err)
+		} else {
+			for descRows.Next() {
+				var descID string
+				if descRows.Scan(&descID) == nil {
+					descendantIDs = append(descendantIDs, descID)
+				}
+			}
+			descRows.Close()
+		}
+	}
+
+	// #73 fix: mark rows 'removed' in the DB FIRST, BEFORE stopping containers
+	// or removing volumes. Previously the sequence was stop → update-status,
+	// which left a gap where:
+	//   - the container's last pre-teardown heartbeat could resurrect the row
+	//     via the register-handler UPSERT (now also guarded in #73)
+	//   - the liveness monitor could observe 'online' status + expired Redis
+	//     TTL and trigger RestartByID, recreating a container we're trying
+	//     to destroy
+	// Marking 'removed' first makes both of those paths no-op via their
+	// existing `status NOT IN ('removed', ...)` guards.
+	allIDs := append([]string{id}, descendantIDs...)
+	if _, err := db.DB.ExecContext(ctx,
+		`UPDATE workspaces SET status = 'removed', updated_at = now() WHERE id = ANY($1::uuid[])`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete status update error for %s: %v", id, err)
+	}
+	if _, err := db.DB.ExecContext(ctx,
+		`DELETE FROM canvas_layouts WHERE workspace_id = ANY($1::uuid[])`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete canvas_layouts error for %s: %v", id, err)
+	}
+	// Revoke all auth tokens for the deleted workspaces. Once the workspace is
+	// gone its tokens are meaningless; leaving them alive would keep
+	// HasAnyLiveTokenGlobal = true even after the platform is otherwise empty,
+	// which prevents AdminAuth from returning to fail-open and breaks the E2E
+	// test's count-zero assertion (and local re-run cleanup).
+	if _, err := db.DB.ExecContext(ctx,
+		`UPDATE workspace_auth_tokens SET revoked_at = now()
+		 WHERE workspace_id = ANY($1::uuid[]) AND revoked_at IS NULL`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete token revocation error for %s: %v", id, err)
+	}
+// #1027: cascade-disable all schedules for the deleted workspaces so
+	// the scheduler never fires a cron into a removed container.
+	if _, err := db.DB.ExecContext(ctx,
+		`UPDATE workspace_schedules SET enabled = false, updated_at = now()
+		 WHERE workspace_id = ANY($1::uuid[]) AND enabled = true`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete schedule disable error for %s: %v", id, err)
+	}
+
+	// Now stop containers + remove volumes for all descendants (any depth).
+	// Any concurrent heartbeat / registration / liveness-triggered restart
+	// will see status='removed' and bail out early.
+	for _, descID := range descendantIDs {
+		if h.provisioner != nil {
+			h.provisioner.Stop(ctx, descID)
+			if err := h.provisioner.RemoveVolume(ctx, descID); err != nil {
+				log.Printf("Delete descendant %s volume removal warning: %v", descID, err)
+			}
+		}
+		db.ClearWorkspaceKeys(ctx, descID)
+		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", descID, map[string]interface{}{})
+	}
+
+	// Stop + remove volume for the workspace itself
+	if h.provisioner != nil {
+		h.provisioner.Stop(ctx, id)
+		if err := h.provisioner.RemoveVolume(ctx, id); err != nil {
+			log.Printf("Delete %s volume removal warning: %v", id, err)
+		}
+	}
+	db.ClearWorkspaceKeys(ctx, id)
+
+	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", id, map[string]interface{}{
+		"cascade_deleted": len(descendantIDs),
+	})
+
+	// Hard purge: cascade delete all FK data and remove the DB row entirely (#1087)
+	if c.Query("purge") == "true" {
+		purgeIDs := pq.Array(allIDs)
+		// Order matters: delete from leaf tables first, then workspace row
+		for _, table := range []string{
+			"agent_memories", "activity_logs", "workspace_secrets",
+			"workspace_channels", "workspace_config", "workspace_memory",
+			"workspace_token_usage", "approval_requests", "audit_events",
+			"workflow_checkpoints", "workspace_artifacts", "agents",
+			"workspace_auth_tokens", "workspace_schedules", "canvas_layouts",
+		} {
+			if _, err := db.DB.ExecContext(ctx,
+				fmt.Sprintf("DELETE FROM %s WHERE workspace_id = ANY($1::uuid[])", table),
+				purgeIDs); err != nil {
+				log.Printf("Purge %s error for %v: %v", table, allIDs, err)
+			}
+		}
+		// Null out parent_id / forwarded_to references
+		db.DB.ExecContext(ctx, "UPDATE workspaces SET parent_id = NULL WHERE parent_id = ANY($1::uuid[])", purgeIDs)
+		db.DB.ExecContext(ctx, "UPDATE workspaces SET forwarded_to = NULL WHERE forwarded_to = ANY($1::uuid[])", purgeIDs)
+		// Hard delete the workspace row
+		if _, err := db.DB.ExecContext(ctx, "DELETE FROM workspaces WHERE id = ANY($1::uuid[])", purgeIDs); err != nil {
+			log.Printf("Purge workspace row error for %v: %v", allIDs, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "purge failed"})
+			return
+		}
+		c.JSON(http.StatusOK, gin.H{"status": "purged", "cascade_deleted": len(descendantIDs)})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{"status": "removed", "cascade_deleted": len(descendantIDs)})
+}
+
+// validateWorkspaceID returns an error when id is not a valid UUID.
+// #687: prevents 500s from Postgres when a garbage string (e.g. ../../etc/passwd)
+// is passed as the :id path parameter.
+func validateWorkspaceID(id string) error {
+	if _, err := uuid.Parse(id); err != nil {
+		return fmt.Errorf("invalid workspace id")
+	}
+	return nil
+}
+
+// yamlSpecialChars is the set of YAML-special characters banned from workspace
+// name and role. Newlines are handled separately below (same error message for
+// all four fields); these additional characters target YAML block indicators,
+// flow-sequence/mapping delimiters, and shell-expansion metacharacters that
+// yamlQuote does NOT escape inside a double-quoted scalar (#685).
+const yamlSpecialChars = "{}[]|>*&!"
+
+// validateWorkspaceFields enforces maximum field lengths and rejects characters
+// that could enable YAML-injection in downstream provisioning paths.
+// #685 (defence-in-depth over yamlQuote — newline + YAML-special chars in name/role),
+// #688 (max field lengths).
+func validateWorkspaceFields(name, role, model, runtime string) error {
+	// All four fields: reject newline / carriage-return.
+	for _, f := range []struct{ label, val string }{
+		{"name", name},
+		{"role", role},
+		{"model", model},
+		{"runtime", runtime},
+	} {
+		if strings.ContainsAny(f.val, "\n\r") {
+			return fmt.Errorf("%s must not contain newline characters", f.label)
+		}
+	}
+	// name and role only: reject YAML-special characters (#685).
+	for _, f := range []struct{ label, val string }{
+		{"name", name},
+		{"role", role},
+	} {
+		if strings.ContainsAny(f.val, yamlSpecialChars) {
+			return fmt.Errorf("%s contains invalid characters", f.label)
+		}
+	}
+	if len(name) > 255 {
+		return fmt.Errorf("name must be at most 255 characters")
+	}
+	if len(role) > 1000 {
+		return fmt.Errorf("role must be at most 1000 characters")
+	}
+	if len(model) > 100 {
+		return fmt.Errorf("model must be at most 100 characters")
+	}
+	if len(runtime) > 100 {
+		return fmt.Errorf("runtime must be at most 100 characters")
+	}
+	return nil
+}
diff --git a/workspace-server/internal/handlers/workspace_provision_test.go b/workspace-server/internal/handlers/workspace_provision_test.go
index b1f5f12e..8feb4403 100644
--- a/workspace-server/internal/handlers/workspace_provision_test.go
+++ b/workspace-server/internal/handlers/workspace_provision_test.go
@@ -906,6 +906,8 @@ func containsStr(s, substr string) bool {
 // truncates content at maxMemoryContentLength before INSERT. Regression
 // test for the error-sanitization / memory-seed contract.
 func TestSeedInitialMemories_Truncation(t *testing.T) {
+	mock := setupTestDB(t)
+
 	largeContent := string(make([]byte, 100_001))
 	copy([]byte(largeContent), "X") // fill with "X" so test is deterministic
 
@@ -1095,7 +1097,7 @@ func TestProvisionWorkspace_NoInternalErrorsInBroadcast(t *testing.T) {
 	mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
 		WillReturnError(errInternalDB)
 
-	broadcaster := &captureBroadcaster{}
+	broadcaster := &captureBroadcaster{broadcaster: events.NewBroadcaster(nil)}
 	handler := &WorkspaceHandler{
 		broadcaster:  broadcaster,
 		provisioner: &provisioner.Provisioner{},
@@ -1143,7 +1145,7 @@ func TestProvisionWorkspaceCP_NoInternalErrorsInBroadcast(t *testing.T) {
 	mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
 		WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
 
-	broadcaster := &captureBroadcaster{}
+	broadcaster := &captureBroadcaster{broadcaster: events.NewBroadcaster(nil)}
 	registry := &mockEnvMutator{returnErr: errInternalDB}
 	handler := &WorkspaceHandler{
 		broadcaster:  broadcaster,
diff --git a/workspace-server/internal/models/workspace.go b/workspace-server/internal/models/workspace.go
index ff8ad0be..26061a1f 100644
--- a/workspace-server/internal/models/workspace.go
+++ b/workspace-server/internal/models/workspace.go
@@ -22,6 +22,7 @@ type Workspace struct {
 	LastErrorRate      float64         `json:"last_error_rate" db:"last_error_rate"`
 	LastSampleError    sql.NullString  `json:"last_sample_error" db:"last_sample_error"`
 	ActiveTasks        int             `json:"active_tasks" db:"active_tasks"`
+	MaxConcurrentTasks int             `json:"max_concurrent_tasks" db:"max_concurrent_tasks"`
 	UptimeSeconds      int             `json:"uptime_seconds" db:"uptime_seconds"`
 	CreatedAt          time.Time       `json:"created_at" db:"created_at"`
 	UpdatedAt          time.Time       `json:"updated_at" db:"updated_at"`
diff --git a/workspace-server/internal/orgtoken/tokens_test.go b/workspace-server/internal/orgtoken/tokens_test.go
index 9f51f46a..e3bee7e7 100644
--- a/workspace-server/internal/orgtoken/tokens_test.go
+++ b/workspace-server/internal/orgtoken/tokens_test.go
@@ -79,7 +79,7 @@ func TestValidate_HappyPath(t *testing.T) {
 		WithArgs("tok-live").
 		WillReturnResult(sqlmock.NewResult(0, 1))
 
-	id, prefix, orgID, err := Validate(context.Background(), db, plaintext)
+	id, prefix, _, err := Validate(context.Background(), db, plaintext)
 	if err != nil {
 		t.Fatalf("Validate: %v", err)
 	}
diff --git a/workspace-server/internal/scheduler/scheduler.go b/workspace-server/internal/scheduler/scheduler.go
index 4fa12880..4ae82247 100644
--- a/workspace-server/internal/scheduler/scheduler.go
+++ b/workspace-server/internal/scheduler/scheduler.go
@@ -267,32 +267,36 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) {
 	// This replaces the #115 "skip when busy" pattern which caused crons
 	// to permanently miss when workspaces were perpetually busy from the
 	// Orchestrator pulse delegation chain (~30% message drop rate on Dev Lead).
+	// Check workspace capacity — fire when active_tasks < max_concurrent_tasks.
+	// Default max is 1 (backward compatible). Workspaces can override via config
+	// to allow concurrent task processing (e.g. leaders handling A2A while cron runs).
 	var activeTasks int
+	var maxConcurrent int
 	if err := db.DB.QueryRowContext(ctx,
-		`SELECT COALESCE(active_tasks, 0) FROM workspaces WHERE id = $1`,
+		`SELECT COALESCE(active_tasks, 0), COALESCE(max_concurrent_tasks, 1) FROM workspaces WHERE id = $1`,
 		sched.WorkspaceID,
-	).Scan(&activeTasks); err == nil && activeTasks > 0 {
-		log.Printf("Scheduler: '%s' workspace %s busy (active_tasks=%d), deferring up to 2 min",
-			sched.Name, short(sched.WorkspaceID, 12), activeTasks)
+	).Scan(&activeTasks, &maxConcurrent); err == nil && activeTasks >= maxConcurrent {
+		log.Printf("Scheduler: '%s' workspace %s at capacity (active_tasks=%d, max=%d), deferring up to 2 min",
+			sched.Name, short(sched.WorkspaceID, 12), activeTasks, maxConcurrent)
 		// Poll every 10s for up to 2 minutes
 		waited := false
 		for i := 0; i < 12; i++ {
 			time.Sleep(10 * time.Second)
 			if err := db.DB.QueryRowContext(ctx,
-				`SELECT COALESCE(active_tasks, 0) FROM workspaces WHERE id = $1`,
+				`SELECT COALESCE(active_tasks, 0), COALESCE(max_concurrent_tasks, 1) FROM workspaces WHERE id = $1`,
 				sched.WorkspaceID,
-			).Scan(&activeTasks); err != nil || activeTasks == 0 {
+			).Scan(&activeTasks, &maxConcurrent); err != nil || activeTasks < maxConcurrent {
 				waited = true
 				break
 			}
 		}
-		if !waited && activeTasks > 0 {
-			log.Printf("Scheduler: skipping '%s' on busy workspace %s after 2 min wait (active_tasks=%d)",
-				sched.Name, short(sched.WorkspaceID, 12), activeTasks)
+		if !waited && activeTasks >= maxConcurrent {
+			log.Printf("Scheduler: skipping '%s' on busy workspace %s after 2 min wait (active_tasks=%d, max=%d)",
+				sched.Name, short(sched.WorkspaceID, 12), activeTasks, maxConcurrent)
 			s.recordSkipped(ctx, sched, activeTasks)
 			return
 		}
-		log.Printf("Scheduler: '%s' workspace %s now idle after deferral, firing",
+		log.Printf("Scheduler: '%s' workspace %s has capacity after deferral, firing",
 			sched.Name, short(sched.WorkspaceID, 12))
 	}
 
diff --git a/workspace-server/migrations/037_max_concurrent_tasks.down.sql b/workspace-server/migrations/037_max_concurrent_tasks.down.sql
new file mode 100644
index 00000000..d5274526
--- /dev/null
+++ b/workspace-server/migrations/037_max_concurrent_tasks.down.sql
@@ -0,0 +1 @@
+ALTER TABLE workspaces DROP COLUMN IF EXISTS max_concurrent_tasks;
diff --git a/workspace-server/migrations/037_max_concurrent_tasks.up.sql b/workspace-server/migrations/037_max_concurrent_tasks.up.sql
new file mode 100644
index 00000000..644ea18c
--- /dev/null
+++ b/workspace-server/migrations/037_max_concurrent_tasks.up.sql
@@ -0,0 +1,5 @@
+-- Per-workspace concurrency limit (#1408).
+-- Default 1 preserves current behavior (single-task). Leaders can be
+-- configured with higher values to accept A2A delegations while a cron runs.
+ALTER TABLE workspaces
+    ADD COLUMN IF NOT EXISTS max_concurrent_tasks INTEGER NOT NULL DEFAULT 1;
diff --git a/workspace/a2a_tools.py b/workspace/a2a_tools.py
index b3ffbbfd..04633209 100644
--- a/workspace/a2a_tools.py
+++ b/workspace/a2a_tools.py
@@ -3,6 +3,7 @@
 Imports shared client functions and constants from a2a_client.
 """
 
+import hashlib
 import json
 import uuid
 
@@ -124,11 +125,16 @@ async def tool_delegate_task_async(workspace_id: str, task: str) -> str:
     if not workspace_id or not task:
         return "Error: workspace_id and task are required"
 
+    # Idempotency key: SHA-256 of (workspace_id, task) so that a restarted agent
+    # firing the same delegation gets the same key and the platform returns the
+    # existing delegation_id instead of creating a duplicate. Fixes #1456.
+    idem_key = hashlib.sha256(f"{workspace_id}:{task}".encode()).hexdigest()[:32]
+
     try:
         async with httpx.AsyncClient(timeout=10.0) as client:
             resp = await client.post(
                 f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/delegate",
-                json={"target_id": workspace_id, "task": task},
+                json={"target_id": workspace_id, "task": task, "idempotency_key": idem_key},
                 headers=_auth_headers_for_heartbeat(),
             )
             if resp.status_code == 202:
diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py
index 0d6e2d85..f40fa6b7 100644
--- a/workspace/executor_helpers.py
+++ b/workspace/executor_helpers.py
@@ -199,14 +199,26 @@ def read_delegation_results() -> str:
 # ========================================================================
 
 async def set_current_task(heartbeat: "HeartbeatLoop | None", task: str) -> None:
-    """Update current task on heartbeat and push immediately via platform API."""
+    """Update current task on heartbeat and push immediately via platform API.
+
+    Uses increment/decrement instead of binary 0/1 so agents can track
+    multiple concurrent tasks (#1408). Pushes immediately on both
+    increment and decrement to avoid phantom-busy (#1372).
+    """
     if heartbeat is not None:
-        heartbeat.current_task = task
-        heartbeat.active_tasks = 1 if task else 0
+        if task:
+            heartbeat.active_tasks = getattr(heartbeat, "active_tasks", 0) + 1
+            heartbeat.current_task = task
+        else:
+            heartbeat.active_tasks = max(0, getattr(heartbeat, "active_tasks", 0) - 1)
+            if heartbeat.active_tasks == 0:
+                heartbeat.current_task = ""
     workspace_id = os.environ.get("WORKSPACE_ID", "")
     platform_url = os.environ.get("PLATFORM_URL", "")
     if not (workspace_id and platform_url):
         return
+    active = getattr(heartbeat, "active_tasks", 0) if heartbeat is not None else (1 if task else 0)
+    cur_task = getattr(heartbeat, "current_task", task or "") if heartbeat is not None else (task or "")
     try:
         try:
             from platform_auth import auth_headers as _auth
@@ -217,8 +229,8 @@ async def set_current_task(heartbeat: "HeartbeatLoop | None", task: str) -> None
             f"{platform_url}/registry/heartbeat",
             json={
                 "workspace_id": workspace_id,
-                "current_task": task,
-                "active_tasks": 1 if task else 0,
+                "current_task": cur_task,
+                "active_tasks": active,
                 "error_rate": 0,
                 "sample_error": "",
                 "uptime_seconds": 0,
diff --git a/workspace/shared_runtime.py b/workspace/shared_runtime.py
index a3838664..dba05700 100644
--- a/workspace/shared_runtime.py
+++ b/workspace/shared_runtime.py
@@ -153,20 +153,21 @@ def brief_task(text: str, limit: int = 60) -> str:
 async def set_current_task(heartbeat: Any, task: str) -> None:
     """Update current task on heartbeat and push immediately to platform.
 
-    The heartbeat loop only fires every 30s, so quick tasks would finish
-    before the canvas ever sees them. Setting a task pushes immediately.
-    Clearing a task only updates the heartbeat object — the next heartbeat
-    cycle will broadcast the clear, keeping the task visible longer.
+    Uses increment/decrement instead of binary 0/1 so agents can track
+    multiple concurrent tasks (e.g. a cron running while an A2A delegation
+    arrives). The counter never goes below 0.
+
+    Pushes immediately on BOTH increment and decrement to avoid phantom-busy
+    (#1372) where active_tasks=1 persisted in the platform DB indefinitely.
     """
     if heartbeat:
-        heartbeat.current_task = task
-        heartbeat.active_tasks = 1 if task else 0
-
-    # Only push immediately when SETTING a task (not clearing)
-    # Clearing is handled by the next heartbeat cycle, which keeps
-    # the task visible on the canvas for quick A2A responses
-    if not task:
-        return
+        if task:
+            heartbeat.active_tasks = getattr(heartbeat, "active_tasks", 0) + 1
+            heartbeat.current_task = task
+        else:
+            heartbeat.active_tasks = max(0, getattr(heartbeat, "active_tasks", 0) - 1)
+            if heartbeat.active_tasks == 0:
+                heartbeat.current_task = ""
 
     import os
     workspace_id = os.environ.get("WORKSPACE_ID", "")
@@ -174,13 +175,15 @@ async def set_current_task(heartbeat: Any, task: str) -> None:
     if workspace_id and platform_url:
         try:
             import httpx
+            active = getattr(heartbeat, "active_tasks", 0) if heartbeat else (1 if task else 0)
+            cur_task = getattr(heartbeat, "current_task", task or "") if heartbeat else (task or "")
             async with httpx.AsyncClient(timeout=3.0) as client:
                 await client.post(
                     f"{platform_url}/registry/heartbeat",
                     json={
                         "workspace_id": workspace_id,
-                        "current_task": task,
-                        "active_tasks": 1,
+                        "current_task": cur_task,
+                        "active_tasks": active,
                         "error_rate": 0,
                         "sample_error": "",
                         "uptime_seconds": 0,