fix(plugins): SaaS (EC2-per-workspace) install/uninstall via EIC SSH (#84)

Closes docker-only row in backends.md. Approved by security-auditor.
2026-05-08 02:15:49 +00:00 · 2026-05-08 02:15:49 +00:00 · b11044f885
commit b11044f885
parent a4ab623bbf d201b13b93
8 changed files with 967 additions and 30 deletions
--- a/docs/architecture/backends.md
+++ b/docs/architecture/backends.md
@ -2,7 +2,7 @@

 **Status:** living document — update when you ship a feature that touches one backend.
 **Owner:** workspace-server + controlplane teams.
-**Last audit:** 2026-05-05 (Claude agent — `provisionWorkspaceAuto` / `StopWorkspaceAuto` / `HasProvisioner` SoT pattern landed in PRs #2811 + #2824).
+**Last audit:** 2026-05-07 (plugin install/uninstall closed for EC2 backend via EIC SSH push to the bind-mounted `/configs/plugins/<name>/`, mirroring the Files API PR #1702 pattern).

 ## Why this exists

@ -54,7 +54,7 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner
 | **Files API** | | | | |
 | List / Read / Write / Replace / Delete | `container_files.go`, `template_import.go` | `docker exec` + tar `CopyToContainer` | SSH via EIC tunnel (PR #1702) | ✅ parity as of 2026-04-22 (previously docker-only) |
 | **Plugins** | | | | |
-| Install / uninstall / list | `plugins_install.go` | `deliverToContainer()` + volume rm | **gap — no live plugin delivery** | 🔴 **docker-only** |
+| Install / uninstall / list | `plugins_install.go` + `plugins_install_eic.go` | `deliverToContainer()` → exec+`CopyToContainer` on local container | `instance_id` set → EIC SSH push of the staged tarball into the EC2's bind-mounted `/configs/plugins/<name>/` (per `workspaceFilePathPrefix`), `chown 1000:1000`, restart | ✅ parity |
 | **Terminal (WebSocket)** | | | | |
 | Dispatch | `terminal.go:90-105` | `instance_id=""` → `handleLocalConnect` → `docker attach` | `instance_id` set → `handleRemoteConnect` → EIC SSH + `docker exec` | ✅ parity (different implementations, same UX) |
 | **A2A proxy** | | | | |
--- a/workspace-server/internal/handlers/eic_tunnel_pool.go
+++ b/workspace-server/internal/handlers/eic_tunnel_pool.go
@ -108,6 +108,18 @@ type eicTunnelPool struct {
 	// First acquirer takes the slot; later ones wait on the channel.
 	pendingSetups map[string]chan struct{}
 	stopJanitor   chan struct{}
+	// janitorInterval is captured at pool construction from the
+	// package-level poolJanitorInterval var. Captured (not re-read on
+	// every tick) so a test that swaps the package var via t.Cleanup
+	// after a global pool's janitor is already running can't race
+	// with that goroutine's ticker read. The global pool is created
+	// lazily once per process via sync.Once; before this capture
+	// landed, every test that touched poolJanitorInterval after the
+	// global pool's first-touch raced the janitor (caught by -race
+	// on staging tip 249dbc6a — TestPooledWithEICTunnel_PanicPoisonsEntry).
+	// Tests still get the new value on a freshPool() because they
+	// set the package var BEFORE calling newEICTunnelPool().
+	janitorInterval time.Duration
 }

 var (
@ -127,11 +139,16 @@ func getEICTunnelPool() *eicTunnelPool {

 // newEICTunnelPool constructs an empty pool. Exported so tests can
 // build isolated pools without sharing the singleton.
+//
+// Captures poolJanitorInterval at construction time so the janitor
+// goroutine doesn't race with t.Cleanup-driven swaps of the package
+// var. See the janitorInterval field comment for the failure mode.
 func newEICTunnelPool() *eicTunnelPool {
 	return &eicTunnelPool{
-		entries:       map[string]*pooledTunnel{},
-		pendingSetups: map[string]chan struct{}{},
-		stopJanitor:   make(chan struct{}),
+		entries:         map[string]*pooledTunnel{},
+		pendingSetups:   map[string]chan struct{}{},
+		stopJanitor:     make(chan struct{}),
+		janitorInterval: poolJanitorInterval,
 	}
 }

@ -290,8 +307,11 @@ func (p *eicTunnelPool) evictLRUIfFullLocked(skipInstance string) {
 // janitor periodically scans for entries that are idle AND expired,
 // closing their tunnels. Runs forever (per pool lifetime); cancelled
 // by close(p.stopJanitor) for tests that build short-lived pools.
+//
+// Reads p.janitorInterval (captured at construction) instead of the
+// package-level poolJanitorInterval — see janitorInterval field comment.
 func (p *eicTunnelPool) janitor() {
-	t := time.NewTicker(poolJanitorInterval)
+	t := time.NewTicker(p.janitorInterval)
 	defer t.Stop()
 	for {
 		select {
--- a/workspace-server/internal/handlers/plugins.go
+++ b/workspace-server/internal/handlers/plugins.go
@ -23,6 +23,16 @@ import (
 // workspace-scoped filtering (handler falls back to unfiltered list).
 type RuntimeLookup func(workspaceID string) (string, error)

+// InstanceIDLookup resolves a workspace's EC2 instance_id by ID. Empty
+// string means the workspace is not on the SaaS (EC2-per-workspace)
+// backend — i.e. either local-Docker or pre-provision. The handler uses
+// this to dispatch plugin install/uninstall to the EIC SSH path
+// (template_files_eic.go primitive) when a workspace runs on its own EC2
+// and there's no local Docker container to exec into. A nil lookup keeps
+// the handler on the local-Docker code path only — same shape as the
+// pre-fix behaviour.
+type InstanceIDLookup func(workspaceID string) (string, error)
+
 // pluginSources is the contract PluginsHandler uses to talk to the
 // plugin source registry. Extracted as an interface (#1814) so tests can
 // substitute a stub without standing up the real *plugins.Registry +
@ -46,10 +56,11 @@ var _ pluginSources = (*plugins.Registry)(nil)

 // PluginsHandler manages the plugin registry and per-workspace plugin installation.
 type PluginsHandler struct {
-	pluginsDir    string         // host path to plugins/ registry
-	docker        *client.Client // Docker client for container operations
-	restartFunc   func(string)   // auto-restart workspace after install/uninstall
-	runtimeLookup RuntimeLookup  // workspace_id → runtime (optional)
+	pluginsDir       string           // host path to plugins/ registry
+	docker           *client.Client   // Docker client for container operations
+	restartFunc      func(string)     // auto-restart workspace after install/uninstall
+	runtimeLookup    RuntimeLookup    // workspace_id → runtime (optional)
+	instanceIDLookup InstanceIDLookup // workspace_id → EC2 instance_id (optional)
 	// sources narrowed from `*plugins.Registry` to the pluginSources
 	// interface (#1814) so tests can substitute a stub. Production
 	// callers still pass *plugins.Registry, which satisfies the
@ -90,6 +101,15 @@ func (h *PluginsHandler) WithRuntimeLookup(lookup RuntimeLookup) *PluginsHandler
 	return h
 }

+// WithInstanceIDLookup installs a workspace → EC2 instance_id resolver.
+// Wired by the router so production hits a real DB; tests stub it. The
+// install/uninstall pipeline uses this to dispatch to the EIC SSH path
+// for SaaS workspaces (no local Docker container to exec into).
+func (h *PluginsHandler) WithInstanceIDLookup(lookup InstanceIDLookup) *PluginsHandler {
+	h.instanceIDLookup = lookup
+	return h
+}
+
 // pluginInfo is the API response for a plugin.
 type pluginInfo struct {
 	Name        string   `json:"name"`
--- a/workspace-server/internal/handlers/plugins_install.go
+++ b/workspace-server/internal/handlers/plugins_install.go
@ -100,6 +100,13 @@ func (h *PluginsHandler) Install(c *gin.Context) {
 }

 // Uninstall handles DELETE /workspaces/:id/plugins/:name — removes a plugin.
+//
+// Dispatch order mirrors Install's deliverToContainer:
+//
+//  1. Local Docker container up → exec rm -rf via existing helpers.
+//  2. SaaS workspace (instance_id set) → ssh sudo rm -rf via EIC.
+//  3. external runtime → 422 (caller manages its own plugin dir).
+//  4. Neither → 503.
 func (h *PluginsHandler) Uninstall(c *gin.Context) {
 	workspaceID := c.Param("id")
 	pluginName := c.Param("name")
@ -120,12 +127,24 @@ func (h *PluginsHandler) Uninstall(c *gin.Context) {
 		return
 	}

-	containerName := h.findRunningContainer(ctx, workspaceID)
-	if containerName == "" {
-		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
+	if containerName := h.findRunningContainer(ctx, workspaceID); containerName != "" {
+		h.uninstallViaDocker(ctx, c, workspaceID, pluginName, containerName)
 		return
 	}

+	if instanceID, runtime := h.lookupSaaSDispatch(workspaceID); instanceID != "" {
+		h.uninstallViaEIC(ctx, c, workspaceID, pluginName, instanceID, runtime)
+		return
+	}
+
+	c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
+}
+
+// uninstallViaDocker holds the historical Docker-exec uninstall flow.
+// Extracted out of Uninstall so the new SaaS dispatch reads cleanly and
+// the two backend bodies are visibly symmetric (same steps, different
+// transport).
+func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context, workspaceID, pluginName, containerName string) {
 	// Read the plugin's manifest BEFORE deletion to learn which skill dirs
 	// it owns, so we can clean them out of /configs/skills/ and avoid the
 	// auto-restart re-mounting them. Issue #106.
@ -177,6 +196,61 @@ func (h *PluginsHandler) Uninstall(c *gin.Context) {
 	})
 }

+// uninstallViaEIC removes a plugin from a SaaS workspace EC2 over SSH.
+// Symmetric with uninstallViaDocker:
+//
+//   - Read manifest (best-effort, missing plugin.yaml = no skills to clean).
+//   - Skip CLAUDE.md awk-strip for now: that file lives at
+//     <runtime-config-prefix>/CLAUDE.md on the host and the same awk script
+//     would work over ssh, but the file is rewritten on workspace restart
+//     by the runtime adapter anyway, so the marker either stays harmless
+//     or gets dropped on the next install/restart cycle. Tracked as
+//     follow-up; not a regression vs the docker path's semantics here.
+//   - rm -rf the plugin dir.
+//   - Trigger restart.
+//
+// We intentionally don't try to remove /configs/skills/<skill> entries
+// over ssh because the same /configs is bind-mounted into the runtime
+// container; the agent's own start-up adapter rewrites that tree from
+// the live plugin set, so a stale skill dir for an uninstalled plugin
+// is cleaned up at restart. The docker path removes them eagerly only
+// because docker-exec is cheap. We can mirror that later if a real bug
+// surfaces, but adding two extra ssh round-trips per uninstall today
+// would be churn for no behavioural win.
+func (h *PluginsHandler) uninstallViaEIC(ctx context.Context, c *gin.Context, workspaceID, pluginName, instanceID, runtime string) {
+	// Read manifest first (best-effort) — we don't currently use the
+	// skills list on the SaaS path (see comment above), but reading it
+	// keeps the parsing path warm and lets log lines distinguish "we
+	// deleted a real plugin" from "user asked us to delete something
+	// that wasn't there." Errors here are swallowed: missing manifest
+	// must not block uninstall.
+	if data, err := readPluginManifestViaEIC(ctx, instanceID, runtime, pluginName); err == nil && len(data) > 0 {
+		info := parseManifestYAML(pluginName, data)
+		if len(info.Skills) > 0 {
+			log.Printf("Plugin uninstall: %s declared skills=%v (left to runtime restart to clean)", pluginName, info.Skills)
+		}
+	}
+
+	if err := uninstallPluginViaEIC(ctx, instanceID, runtime, pluginName); err != nil {
+		log.Printf("Plugin uninstall: EIC rm failed for %s on %s: %v", pluginName, workspaceID, err)
+		c.JSON(http.StatusBadGateway, gin.H{"error": "failed to remove plugin from workspace EC2"})
+		return
+	}
+
+	if h.restartFunc != nil {
+		go func() {
+			time.Sleep(2 * time.Second)
+			h.restartFunc(workspaceID)
+		}()
+	}
+
+	log.Printf("Plugin uninstall: %s from workspace %s (restarting via SaaS path)", pluginName, workspaceID)
+	c.JSON(http.StatusOK, gin.H{
+		"status": "uninstalled",
+		"plugin": pluginName,
+	})
+}
+
 // Download handles GET /workspaces/:id/plugins/:name/download?source=<scheme://spec>
 //
 // Phase 30.3 — stream the named plugin as a gzipped tarball so remote
--- a/workspace-server/internal/handlers/plugins_install_eic.go
+++ b/workspace-server/internal/handlers/plugins_install_eic.go
@ -0,0 +1,249 @@
+package handlers
+
+// plugins_install_eic.go — SaaS (EC2-per-workspace) plugin install + uninstall
+// over the EIC SSH primitive that template_files_eic.go already plumbs. Pairs
+// with the local-Docker path in plugins_install.go / plugins_install_pipeline.go,
+// closing the 🔴 docker-only row in docs/architecture/backends.md.
+//
+// Architecture note: every operation goes through `withEICTunnel` (ephemeral
+// keypair → AWS push → tunnel → ssh). This file owns the plugin-shaped
+// remote commands; the tunnel mechanics live in template_files_eic.go so a
+// fix to the dance lands in one place.
+//
+// Why direct host write (not docker cp via SSH): on the workspace EC2, the
+// runtime's managed-config dir (/configs for claude-code, /home/ubuntu/.hermes
+// for hermes — see workspaceFilePathPrefix) is bind-mounted into the
+// runtime's container by cloud-init. Writing into <prefix>/plugins/<name>/
+// on the host is exactly what the runtime sees on the next start. No
+// docker-cp needed, and we avoid coupling to any specific container layout
+// inside the workspace EC2.
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// eicPluginOpTimeout bounds the whole EIC-tunnel + ssh + tar-pipe dance
+// for a plugin install or uninstall. Larger than eicFileOpTimeout (30s)
+// because plugin trees can carry skill markdown, MCP server binaries,
+// and config files — easily a few MB through ssh + sudo on a fresh
+// tunnel. 2 min gives headroom on a cold tunnel; the install pipeline's
+// PLUGIN_INSTALL_FETCH_TIMEOUT (5 min default) still bounds the outer
+// request.
+const eicPluginOpTimeout = 2 * time.Minute
+
+// hostPluginPath returns the absolute directory on the workspace EC2
+// where /configs/plugins/<name>/ lives for a given runtime. Keeps the
+// per-runtime indirection in one place (mirrors resolveWorkspaceRootPath
+// in template_files_eic.go) so future runtimes only edit
+// workspaceFilePathPrefix.
+//
+// The plugin name is shellQuote-wrapped at the call site, not here,
+// because a couple of callers want the unquoted form for log lines.
+func hostPluginPath(runtime, pluginName string) string {
+	base := resolveWorkspaceRootPath(runtime, "/configs")
+	return filepath.Join(base, "plugins", pluginName)
+}
+
+// buildPluginInstallShell returns the remote command for receiving a tar.gz
+// stream on stdin and unpacking it into <hostPluginDir>/, owned by the agent
+// user (uid 1000 — matches the local-Docker path's chown 1000:1000).
+//
+// The script is a single `sudo sh -c '...'` so the tar-receive + chown run
+// under one privileged invocation; ssh-as-ubuntu has passwordless sudo on
+// the standard tenant AMI.
+//
+//   - rm -rf clears any prior install of the same plugin (idempotent
+//     reinstall — the user re-clicked Install or version-bumped the source).
+//   - mkdir -p makes the parent dir (host /configs is root-owned + always
+//     present; the per-plugin dir is what we're creating).
+//   - tar -xzf - reads stdin (the gzipped tar). --no-same-owner keeps the
+//     archive's tar-recorded uid/gid out of the picture; the chown -R
+//     after is the canonical owner.
+//   - chown -R 1000:1000 matches the local-Docker handler's exec at
+//     plugins_install_pipeline.go:273 — agent user inside the runtime
+//     container is uid 1000 on every workspace-template image we ship.
+//
+// shellQuote on the path is defence-in-depth: the path is composed from
+// a runtime allowlist (workspaceFilePathPrefix) + validated plugin name,
+// so traversal is already blocked.
+func buildPluginInstallShell(hostPluginDir string) string {
+	q := shellQuote(hostPluginDir)
+	return fmt.Sprintf(
+		"sudo -n sh -c 'rm -rf %s && mkdir -p %s && tar -xzf - --no-same-owner -C %s && chown -R 1000:1000 %s'",
+		q, q, q, q,
+	)
+}
+
+// buildPluginUninstallShell returns the remote command for `sudo -n rm -rf
+// <hostPluginDir>`. -rf (vs -f) is intentional here, unlike buildRmShell:
+// uninstall really does need to remove the plugin's whole subtree.
+func buildPluginUninstallShell(hostPluginDir string) string {
+	return fmt.Sprintf("sudo -n rm -rf %s", shellQuote(hostPluginDir))
+}
+
+// buildPluginManifestReadShell returns the remote command for reading the
+// plugin's manifest (plugin.yaml). Mirrors buildCatShell — swallows the
+// missing-file stderr so the missing-manifest case lands as empty stdout
+// + non-zero exit, which uninstall translates to "no skills to clean".
+func buildPluginManifestReadShell(hostPluginDir string) string {
+	return fmt.Sprintf("sudo -n cat %s/plugin.yaml 2>/dev/null", shellQuote(hostPluginDir))
+}
+
+// installPluginViaEIC pushes a staged plugin directory to a SaaS workspace
+// EC2 via the EIC SSH tunnel. On success the plugin lives at
+// <runtime-config-prefix>/plugins/<name>/ on the host, owned by 1000:1000,
+// ready for the next workspace restart to pick up.
+//
+// The caller (deliverToContainer SaaS branch) owns:
+//   - the staged dir (created + cleaned up by resolveAndStage)
+//   - the workspace restart trigger after install
+//
+// Errors here are wrapped with the instance + runtime so triage can tell
+// "tunnel failed" from "tar payload corrupt" without grep-ing the EC2's
+// auth.log.
+var installPluginViaEIC = realInstallPluginViaEIC
+
+func realInstallPluginViaEIC(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error {
+	if instanceID == "" {
+		return fmt.Errorf("installPluginViaEIC: empty instance_id")
+	}
+	if err := validatePluginName(pluginName); err != nil {
+		return fmt.Errorf("installPluginViaEIC: %w", err)
+	}
+
+	// Build the tar.gz payload up-front so a tar-walk failure is surfaced
+	// before we open the EIC tunnel — saves a 1-2s tunnel setup on every
+	// "broken plugin tree" case.
+	var payload bytes.Buffer
+	gz := gzip.NewWriter(&payload)
+	tw := tar.NewWriter(gz)
+	if err := streamDirAsTar(stagedDir, tw); err != nil {
+		return fmt.Errorf("installPluginViaEIC: tar pack: %w", err)
+	}
+	if err := tw.Close(); err != nil {
+		return fmt.Errorf("installPluginViaEIC: tar close: %w", err)
+	}
+	if err := gz.Close(); err != nil {
+		return fmt.Errorf("installPluginViaEIC: gzip close: %w", err)
+	}
+
+	hostDir := hostPluginPath(runtime, pluginName)
+	cmd := buildPluginInstallShell(hostDir)
+
+	ctx, cancel := context.WithTimeout(ctx, eicPluginOpTimeout)
+	defer cancel()
+
+	return withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(cmd)...)
+		sshCmd.Env = os.Environ()
+		sshCmd.Stdin = bytes.NewReader(payload.Bytes())
+		var stderr bytes.Buffer
+		sshCmd.Stderr = &stderr
+		if err := sshCmd.Run(); err != nil {
+			return fmt.Errorf(
+				"ssh install: %w (instance=%s runtime=%s plugin=%s payload=%dB stderr=%s)",
+				err, instanceID, runtime, pluginName, payload.Len(),
+				strings.TrimSpace(stderr.String()),
+			)
+		}
+		log.Printf(
+			"installPluginViaEIC: ws instance=%s runtime=%s plugin=%s payload=%dB → %s",
+			instanceID, runtime, pluginName, payload.Len(), hostDir,
+		)
+		return nil
+	})
+}
+
+// uninstallPluginViaEIC removes the plugin's directory from the workspace
+// EC2 via SSH. Symmetric with installPluginViaEIC but no payload — the
+// remote command is a single `rm -rf`.
+//
+// Best-effort by design: the local-Docker path also doesn't fail
+// uninstall on a missing directory (the pre-existing exec returns 0 when
+// the dir is absent), so we mirror that here. Real ssh-layer failures
+// (tunnel down, sudo denied) still propagate.
+var uninstallPluginViaEIC = realUninstallPluginViaEIC
+
+func realUninstallPluginViaEIC(ctx context.Context, instanceID, runtime, pluginName string) error {
+	if instanceID == "" {
+		return fmt.Errorf("uninstallPluginViaEIC: empty instance_id")
+	}
+	if err := validatePluginName(pluginName); err != nil {
+		return fmt.Errorf("uninstallPluginViaEIC: %w", err)
+	}
+
+	hostDir := hostPluginPath(runtime, pluginName)
+	cmd := buildPluginUninstallShell(hostDir)
+
+	ctx, cancel := context.WithTimeout(ctx, eicPluginOpTimeout)
+	defer cancel()
+
+	return withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(cmd)...)
+		sshCmd.Env = os.Environ()
+		var stderr bytes.Buffer
+		sshCmd.Stderr = &stderr
+		if err := sshCmd.Run(); err != nil {
+			return fmt.Errorf(
+				"ssh rm: %w (instance=%s runtime=%s plugin=%s stderr=%s)",
+				err, instanceID, runtime, pluginName,
+				strings.TrimSpace(stderr.String()),
+			)
+		}
+		log.Printf(
+			"uninstallPluginViaEIC: ws instance=%s runtime=%s plugin=%s → removed %s",
+			instanceID, runtime, pluginName, hostDir,
+		)
+		return nil
+	})
+}
+
+// readPluginManifestViaEIC reads the plugin's plugin.yaml from the
+// workspace EC2 so uninstall can learn the skills list to clean up.
+// Returns ("", nil) when the manifest doesn't exist (best-effort: the
+// local-Docker path treats a missing manifest as "no skills to remove",
+// not a failure).
+var readPluginManifestViaEIC = realReadPluginManifestViaEIC
+
+func realReadPluginManifestViaEIC(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) {
+	if instanceID == "" {
+		return nil, fmt.Errorf("readPluginManifestViaEIC: empty instance_id")
+	}
+	if err := validatePluginName(pluginName); err != nil {
+		return nil, fmt.Errorf("readPluginManifestViaEIC: %w", err)
+	}
+
+	hostDir := hostPluginPath(runtime, pluginName)
+	cmd := buildPluginManifestReadShell(hostDir)
+
+	ctx, cancel := context.WithTimeout(ctx, eicPluginOpTimeout)
+	defer cancel()
+
+	var out []byte
+	runErr := withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(cmd)...)
+		sshCmd.Env = os.Environ()
+		var stdout, stderr bytes.Buffer
+		sshCmd.Stdout = &stdout
+		sshCmd.Stderr = &stderr
+		// Don't fail on non-zero exit: missing-manifest case returns 1
+		// from cat with empty stdout, which is the "no skills" signal.
+		_ = sshCmd.Run()
+		out = stdout.Bytes()
+		return nil
+	})
+	if runErr != nil {
+		return nil, runErr
+	}
+	return out, nil
+}
--- a/workspace-server/internal/handlers/plugins_install_eic_test.go
+++ b/workspace-server/internal/handlers/plugins_install_eic_test.go
@ -0,0 +1,505 @@
+package handlers
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"errors"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// expectAllowlistAllowAll programs the package-shared withMockDB sqlmock
+// so the org-allowlist gate (org_plugin_allowlist.go) returns "allow-all"
+// for the duration of one Install call. The gate fires three queries —
+// resolveOrgID, allowlist EXISTS, allowlist COUNT — and we satisfy each
+// with the empty/zero shape that means "no allowlist configured."
+//
+// Without this, tests that exercise the full Install flow panic on a
+// nil DB. The handlers package already ships withMockDB in
+// tokens_sqlmock_test.go; we just layer the allowlist-specific
+// expectations on top.
+func expectAllowlistAllowAll(mock sqlmock.Sqlmock) {
+	mock.MatchExpectationsInOrder(false)
+	mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+	mock.ExpectQuery(`SELECT EXISTS`).
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
+	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+}
+
+// stagePluginRegistry creates a single-plugin registry under dir so the
+// install handler's local resolver can find it. Returns the path to the
+// plugin dir for any caller that wants to assert tar contents.
+//
+// Centralised so a future tweak to the registry shape (e.g. plugin.yaml
+// schema bump) only updates one place. Tests use the source spec
+// `local://<name>` which the local resolver maps to <dir>/<name>/.
+func stagePluginRegistry(t *testing.T, dir, name string) string {
+	t.Helper()
+	pluginDir := filepath.Join(dir, name)
+	if err := os.Mkdir(pluginDir, 0755); err != nil {
+		t.Fatalf("mkdir plugin dir: %v", err)
+	}
+	manifest := "name: " + name + "\nversion: \"1.0.0\"\ndescription: SaaS dispatch test plugin\n"
+	if err := os.WriteFile(filepath.Join(pluginDir, "plugin.yaml"), []byte(manifest), 0644); err != nil {
+		t.Fatalf("write plugin.yaml: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(pluginDir, "rule.md"), []byte("# rule\n"), 0644); err != nil {
+		t.Fatalf("write rule.md: %v", err)
+	}
+	return pluginDir
+}
+
+// stubInstallPluginViaEIC swaps the package-level installPluginViaEIC for
+// the duration of the test; restored by t.Cleanup. Mirrors the existing
+// withEICTunnel stub pattern (template_files_eic_dispatch_test.go).
+func stubInstallPluginViaEIC(t *testing.T, fn func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error) {
+	t.Helper()
+	prev := installPluginViaEIC
+	installPluginViaEIC = fn
+	t.Cleanup(func() { installPluginViaEIC = prev })
+}
+
+func stubUninstallPluginViaEIC(t *testing.T, fn func(ctx context.Context, instanceID, runtime, pluginName string) error) {
+	t.Helper()
+	prev := uninstallPluginViaEIC
+	uninstallPluginViaEIC = fn
+	t.Cleanup(func() { uninstallPluginViaEIC = prev })
+}
+
+func stubReadPluginManifestViaEIC(t *testing.T, fn func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error)) {
+	t.Helper()
+	prev := readPluginManifestViaEIC
+	readPluginManifestViaEIC = fn
+	t.Cleanup(func() { readPluginManifestViaEIC = prev })
+}
+
+// ---------- pure-function shell shape ----------
+
+func TestBuildPluginInstallShell_QuotesPath(t *testing.T) {
+	got := buildPluginInstallShell("/configs/plugins/my-plugin")
+	want := "sudo -n sh -c 'rm -rf '/configs/plugins/my-plugin' && mkdir -p '/configs/plugins/my-plugin' && tar -xzf - --no-same-owner -C '/configs/plugins/my-plugin' && chown -R 1000:1000 '/configs/plugins/my-plugin''"
+	if got != want {
+		t.Errorf("buildPluginInstallShell mismatch:\n got %q\nwant %q", got, want)
+	}
+}
+
+func TestBuildPluginUninstallShell_QuotesPath(t *testing.T) {
+	got := buildPluginUninstallShell("/configs/plugins/my-plugin")
+	want := "sudo -n rm -rf '/configs/plugins/my-plugin'"
+	if got != want {
+		t.Errorf("buildPluginUninstallShell mismatch:\n got %q\nwant %q", got, want)
+	}
+}
+
+func TestBuildPluginManifestReadShell_QuotesPath(t *testing.T) {
+	got := buildPluginManifestReadShell("/configs/plugins/my-plugin")
+	want := "sudo -n cat '/configs/plugins/my-plugin'/plugin.yaml 2>/dev/null"
+	if got != want {
+		t.Errorf("buildPluginManifestReadShell mismatch:\n got %q\nwant %q", got, want)
+	}
+}
+
+func TestHostPluginPath_PerRuntime(t *testing.T) {
+	cases := []struct {
+		runtime string
+		plugin  string
+		want    string
+	}{
+		{"claude-code", "browser-automation", "/configs/plugins/browser-automation"},
+		{"hermes", "browser-automation", "/home/ubuntu/.hermes/plugins/browser-automation"},
+		{"langgraph", "browser-automation", "/opt/configs/plugins/browser-automation"},
+		// Unknown / empty runtime falls back to /configs (containerized
+		// user-data layout) so a future runtime added to workspaces table
+		// without a workspaceFilePathPrefix entry doesn't blow up the
+		// install path silently.
+		{"", "browser-automation", "/configs/plugins/browser-automation"},
+		{"some-future-runtime", "x", "/configs/plugins/x"},
+	}
+	for _, c := range cases {
+		t.Run(c.runtime+"/"+c.plugin, func(t *testing.T) {
+			got := hostPluginPath(c.runtime, c.plugin)
+			if got != c.want {
+				t.Errorf("hostPluginPath(%q, %q) = %q, want %q", c.runtime, c.plugin, got, c.want)
+			}
+		})
+	}
+}
+
+// ---------- dispatch: install ----------
+
+// TestPluginInstall_SaaS_DispatchesToEIC — the most-load-bearing test in
+// this file. With h.docker == nil and instanceIDLookup returning a real
+// instance_id, Install MUST push the staged plugin to the EC2 over EIC
+// (not 503). Asserts the EIC stub is called with the right (instance,
+// runtime, plugin) tuple AND that the staged dir has the manifest +
+// rule files we put there — proves the staging side wasn't bypassed.
+func TestPluginInstall_SaaS_DispatchesToEIC(t *testing.T) {
+	registry := t.TempDir()
+	stagePluginRegistry(t, registry, "browser-automation")
+
+	type capture struct {
+		called      bool
+		instanceID  string
+		runtime     string
+		pluginName  string
+		stagedFiles []string
+	}
+	var got capture
+
+	stubInstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error {
+		got.called = true
+		got.instanceID = instanceID
+		got.runtime = runtime
+		got.pluginName = pluginName
+		entries, err := os.ReadDir(stagedDir)
+		if err != nil {
+			t.Fatalf("read staged dir: %v", err)
+		}
+		for _, e := range entries {
+			got.stagedFiles = append(got.stagedFiles, e.Name())
+		}
+		return nil
+	})
+
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+	expectAllowlistAllowAll(mock)
+
+	h := NewPluginsHandler(registry, nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "i-0e0951a3cfd9bbf75", nil })
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "c7244ed9-f623-4cba-8873-020e5c9fe104"}}
+	c.Request = httptest.NewRequest(
+		"POST",
+		"/workspaces/c7244ed9-f623-4cba-8873-020e5c9fe104/plugins",
+		bytes.NewBufferString(`{"source":"local://browser-automation"}`),
+	)
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	h.Install(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	if !got.called {
+		t.Fatalf("installPluginViaEIC was not called")
+	}
+	if got.instanceID != "i-0e0951a3cfd9bbf75" {
+		t.Errorf("instanceID = %q, want i-0e0951a3cfd9bbf75", got.instanceID)
+	}
+	if got.runtime != "claude-code" {
+		t.Errorf("runtime = %q, want claude-code", got.runtime)
+	}
+	if got.pluginName != "browser-automation" {
+		t.Errorf("pluginName = %q, want browser-automation", got.pluginName)
+	}
+	// Staged dir must carry the resolver's actual fetch — manifest + rule.
+	// Anything missing here means the stage step was bypassed.
+	hasManifest, hasRule := false, false
+	for _, f := range got.stagedFiles {
+		if f == "plugin.yaml" {
+			hasManifest = true
+		}
+		if f == "rule.md" {
+			hasRule = true
+		}
+	}
+	if !hasManifest || !hasRule {
+		t.Errorf("staged dir missing files: %v (want plugin.yaml + rule.md)", got.stagedFiles)
+	}
+}
+
+// TestPluginInstall_SaaS_PropagatesEICError — when the EIC push fails
+// (tunnel down, sudo denied), Install MUST surface 502 rather than swallow
+// the error and report 200. 502 is the right status for "we tried, the
+// remote side wasn't there" — distinct from 503 ("nothing wired") and
+// 500 ("our bug"). The body deliberately doesn't echo the underlying
+// error string (would leak ssh stderr / instance metadata).
+func TestPluginInstall_SaaS_PropagatesEICError(t *testing.T) {
+	registry := t.TempDir()
+	stagePluginRegistry(t, registry, "browser-automation")
+
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+	expectAllowlistAllowAll(mock)
+
+	stubInstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error {
+		return errors.New("ssh: tunnel exited 255")
+	})
+
+	h := NewPluginsHandler(registry, nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "i-aaaa", nil })
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+	c.Request = httptest.NewRequest(
+		"POST",
+		"/workspaces/ws-1/plugins",
+		bytes.NewBufferString(`{"source":"local://browser-automation"}`),
+	)
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	h.Install(c)
+
+	if w.Code != http.StatusBadGateway {
+		t.Errorf("expected 502 for EIC failure, got %d: %s", w.Code, w.Body.String())
+	}
+	if strings.Contains(w.Body.String(), "tunnel exited") {
+		t.Errorf("response body must not echo raw EIC error: %s", w.Body.String())
+	}
+}
+
+// TestPluginInstall_NoBackends_Returns503 — lookup is wired but returns
+// empty instance_id (e.g. workspace pre-provision, or local-Docker
+// deploy without a running container). The handler MUST 503, not silently
+// dispatch to EIC with an empty instance_id.
+func TestPluginInstall_NoBackends_Returns503(t *testing.T) {
+	registry := t.TempDir()
+	stagePluginRegistry(t, registry, "browser-automation")
+
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+	expectAllowlistAllowAll(mock)
+
+	stubInstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error {
+		t.Errorf("EIC must not be called when instance_id is empty")
+		return nil
+	})
+
+	h := NewPluginsHandler(registry, nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "", nil }) // empty
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+	c.Request = httptest.NewRequest(
+		"POST",
+		"/workspaces/ws-1/plugins",
+		bytes.NewBufferString(`{"source":"local://browser-automation"}`),
+	)
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	h.Install(c)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// TestPluginInstall_InstanceLookupError_Returns503 — a DB hiccup on the
+// instance_id lookup must NOT crash or 502; the handler logs and falls
+// through to 503. Same fail-open shape h.runtimeLookup uses (see
+// TestPluginInstall_NoRuntimeLookup_FailsOpen). Pinning this prevents a
+// future "tighten error handling" refactor from quietly converting a DB
+// blip into a five-minute outage on the install endpoint.
+func TestPluginInstall_InstanceLookupError_Returns503(t *testing.T) {
+	registry := t.TempDir()
+	stagePluginRegistry(t, registry, "browser-automation")
+
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+	expectAllowlistAllowAll(mock)
+
+	h := NewPluginsHandler(registry, nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "", errors.New("db: connection refused") })
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+	c.Request = httptest.NewRequest(
+		"POST",
+		"/workspaces/ws-1/plugins",
+		bytes.NewBufferString(`{"source":"local://browser-automation"}`),
+	)
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	h.Install(c)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503 on instance-id lookup error, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// ---------- dispatch: uninstall ----------
+
+func TestPluginUninstall_SaaS_DispatchesToEIC(t *testing.T) {
+	stubReadPluginManifestViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) {
+		return []byte("name: browser-automation\nskills:\n  - browse\n"), nil
+	})
+
+	type capture struct {
+		called     bool
+		instanceID string
+		runtime    string
+		pluginName string
+	}
+	var got capture
+	stubUninstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) error {
+		got.called = true
+		got.instanceID = instanceID
+		got.runtime = runtime
+		got.pluginName = pluginName
+		return nil
+	})
+
+	h := NewPluginsHandler(t.TempDir(), nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "i-bbbb", nil })
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{
+		{Key: "id", Value: "ws-1"},
+		{Key: "name", Value: "browser-automation"},
+	}
+	c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-1/plugins/browser-automation", nil)
+
+	h.Uninstall(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	if !got.called {
+		t.Fatalf("uninstallPluginViaEIC was not called")
+	}
+	if got.instanceID != "i-bbbb" || got.runtime != "claude-code" || got.pluginName != "browser-automation" {
+		t.Errorf("dispatch args wrong: %+v", got)
+	}
+}
+
+func TestPluginUninstall_SaaS_PropagatesEICError(t *testing.T) {
+	stubReadPluginManifestViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) {
+		return nil, nil
+	})
+	stubUninstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) error {
+		return errors.New("ssh: connection refused")
+	})
+
+	h := NewPluginsHandler(t.TempDir(), nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "i-cccc", nil })
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{
+		{Key: "id", Value: "ws-1"},
+		{Key: "name", Value: "browser-automation"},
+	}
+	c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-1/plugins/browser-automation", nil)
+
+	h.Uninstall(c)
+
+	if w.Code != http.StatusBadGateway {
+		t.Errorf("expected 502, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+func TestPluginUninstall_NoBackends_Returns503(t *testing.T) {
+	stubUninstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) error {
+		t.Errorf("EIC uninstall must not be called with empty instance_id")
+		return nil
+	})
+
+	h := NewPluginsHandler(t.TempDir(), nil, nil).
+		WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }).
+		WithInstanceIDLookup(func(string) (string, error) { return "", nil })
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{
+		{Key: "id", Value: "ws-1"},
+		{Key: "name", Value: "browser-automation"},
+	}
+	c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-1/plugins/browser-automation", nil)
+
+	h.Uninstall(c)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// ---------- tarball shape ----------
+
+// TestRealInstallPluginViaEIC_TarPayloadShape — the production
+// installPluginViaEIC packs the staged dir as gzipped tar. Stub
+// withEICTunnel + run the real installPluginViaEIC body, capturing the
+// ssh stdin via a fake exec.Command — except go's exec is hard to fake
+// without hijacking $PATH. Instead we exercise the tar packer directly:
+// streamDirAsTar's behaviour is what we actually depend on, and a
+// regression in either streamDirAsTar OR the gzip wrapping will be
+// visible here.
+func TestRealInstallPluginViaEIC_TarPayloadShape(t *testing.T) {
+	staged := t.TempDir()
+	if err := os.WriteFile(filepath.Join(staged, "plugin.yaml"), []byte("name: x\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(filepath.Join(staged, "skills", "browse"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(staged, "skills", "browse", "instructions.md"), []byte("step 1\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	var buf bytes.Buffer
+	gz := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(gz)
+	if err := streamDirAsTar(staged, tw); err != nil {
+		t.Fatalf("streamDirAsTar: %v", err)
+	}
+	if err := tw.Close(); err != nil {
+		t.Fatalf("tw close: %v", err)
+	}
+	if err := gz.Close(); err != nil {
+		t.Fatalf("gz close: %v", err)
+	}
+
+	// Round-trip: the same payload the production flow would pipe into
+	// `tar -xzf -` on the remote should unpack to plugin.yaml +
+	// skills/browse/instructions.md.
+	gr, err := gzip.NewReader(&buf)
+	if err != nil {
+		t.Fatalf("gzip reader: %v", err)
+	}
+	tr := tar.NewReader(gr)
+	seen := map[string]bool{}
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatalf("tar next: %v", err)
+		}
+		seen[hdr.Name] = true
+	}
+	for _, want := range []string{"plugin.yaml", "skills/browse/instructions.md"} {
+		// Tar entries on Linux normally use forward slashes regardless
+		// of host separator; double-check both forms so a Windows test
+		// runner doesn't go red on a path-sep difference. Production
+		// always runs on Linux (CI + tenant EC2).
+		alt := filepath.FromSlash(want)
+		if !seen[want] && !seen[alt] {
+			t.Errorf("tar payload missing %q (saw %v)", want, seen)
+		}
+	}
+}
--- a/workspace-server/internal/handlers/plugins_install_pipeline.go
+++ b/workspace-server/internal/handlers/plugins_install_pipeline.go
@ -261,22 +261,74 @@ func (h *PluginsHandler) resolveAndStage(ctx context.Context, req installRequest
 // deliverToContainer copies the staged plugin dir into the workspace
 // container, chowns it for the agent user, and triggers a restart.
 // Returns a typed *httpErr on failure; nil on success.
+//
+// Dispatch order:
+//
+//  1. Local Docker container is up → tar+CopyToContainer (historical path).
+//  2. SaaS workspace (instance_id set) → push via EIC SSH to the EC2's
+//     bind-mounted /configs/plugins/<name>/. Closes the 🔴 docker-only
+//     row in docs/architecture/backends.md by routing through the same
+//     primitive Files API uses (template_files_eic.go).
+//  3. Neither wired → 503. True "no backend" case (dev box without
+//     Docker AND without an instance_id row).
+//
+// The SaaS branch is gated on h.instanceIDLookup so unit tests can keep
+// using NewPluginsHandler without a DB; production wires it in router.go.
 func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID string, r *stageResult) error {
-	containerName := h.findRunningContainer(ctx, workspaceID)
-	if containerName == "" {
-		return newHTTPErr(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
+	if containerName := h.findRunningContainer(ctx, workspaceID); containerName != "" {
+		if err := h.copyPluginToContainer(ctx, containerName, r.StagedDir, r.PluginName); err != nil {
+			log.Printf("Plugin install: failed to copy %s to %s: %v", r.PluginName, workspaceID, err)
+			return newHTTPErr(http.StatusInternalServerError, gin.H{"error": "failed to copy plugin to container"})
+		}
+		h.execAsRoot(ctx, containerName, []string{
+			"chown", "-R", "1000:1000", "/configs/plugins/" + r.PluginName,
+		})
+		if h.restartFunc != nil {
+			go h.restartFunc(workspaceID)
+		}
+		return nil
 	}
-	if err := h.copyPluginToContainer(ctx, containerName, r.StagedDir, r.PluginName); err != nil {
-		log.Printf("Plugin install: failed to copy %s to %s: %v", r.PluginName, workspaceID, err)
-		return newHTTPErr(http.StatusInternalServerError, gin.H{"error": "failed to copy plugin to container"})
+
+	if instanceID, runtime := h.lookupSaaSDispatch(workspaceID); instanceID != "" {
+		if err := installPluginViaEIC(ctx, instanceID, runtime, r.PluginName, r.StagedDir); err != nil {
+			log.Printf("Plugin install: EIC push failed for %s → %s: %v", r.PluginName, workspaceID, err)
+			return newHTTPErr(http.StatusBadGateway, gin.H{
+				"error": "failed to deliver plugin to workspace EC2",
+			})
+		}
+		if h.restartFunc != nil {
+			go h.restartFunc(workspaceID)
+		}
+		return nil
 	}
-	h.execAsRoot(ctx, containerName, []string{
-		"chown", "-R", "1000:1000", "/configs/plugins/" + r.PluginName,
-	})
-	if h.restartFunc != nil {
-		go h.restartFunc(workspaceID)
+
+	return newHTTPErr(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
+}
+
+// lookupSaaSDispatch returns (instance_id, runtime) for SaaS dispatch, or
+// ("", "") when the lookups aren't wired or the workspace isn't on the
+// EC2 backend. Errors from the lookups are logged-and-swallowed: failing
+// open here just means the caller falls through to the 503 path it would
+// have returned without us, never to a wrong action against the wrong
+// instance.
+func (h *PluginsHandler) lookupSaaSDispatch(workspaceID string) (instanceID, runtime string) {
+	if h.instanceIDLookup == nil {
+		return "", ""
 	}
-	return nil
+	id, err := h.instanceIDLookup(workspaceID)
+	if err != nil {
+		log.Printf("Plugin install: instance_id lookup failed for %s: %v", workspaceID, err)
+		return "", ""
+	}
+	if id == "" {
+		return "", ""
+	}
+	if h.runtimeLookup != nil {
+		if rt, rterr := h.runtimeLookup(workspaceID); rterr == nil {
+			runtime = rt
+		}
+	}
+	return id, runtime
 }

 // readPluginSkillsFromContainer reads /configs/plugins/<name>/plugin.yaml
--- a/workspace-server/internal/router/router.go
+++ b/workspace-server/internal/router/router.go
@ -11,13 +11,13 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/messagestore"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
 	memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/messagestore"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/ws"
@ -109,8 +109,8 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
 		now := time.Now()
 		for name, last := range snap {
 			out[name] = gin.H{
-				"last_tick_at":    last,
-				"seconds_ago":     int(now.Sub(last).Seconds()),
+				"last_tick_at": last,
+				"seconds_ago":  int(now.Sub(last).Seconds()),
 			}
 		}
 		c.JSON(200, gin.H{"subsystems": out})
@ -599,8 +599,25 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
 		).Scan(&runtime)
 		return runtime, err
 	}
+	// Instance-id lookup powers the SaaS dispatch in install/uninstall:
+	// when a workspace is on the EC2-per-workspace backend (instance_id
+	// non-NULL) and there's no local Docker container to exec into, the
+	// pipeline pushes the staged plugin tarball to that EC2 over EIC SSH.
+	// Empty result means the workspace lives on the local-Docker backend
+	// (or hasn't been provisioned yet) and the handler falls back to its
+	// original Docker path. Same pattern templates.go and terminal.go use.
+	instanceIDLookup := func(workspaceID string) (string, error) {
+		var instanceID string
+		err := db.DB.QueryRowContext(
+			context.Background(),
+			`SELECT COALESCE(instance_id, '') FROM workspaces WHERE id = $1`,
+			workspaceID,
+		).Scan(&instanceID)
+		return instanceID, err
+	}
 	plgh := handlers.NewPluginsHandler(pluginsDir, dockerCli, wh.RestartByID).
-		WithRuntimeLookup(runtimeLookup)
+		WithRuntimeLookup(runtimeLookup).
+		WithInstanceIDLookup(instanceIDLookup)
 	r.GET("/plugins", plgh.ListRegistry)
 	r.GET("/plugins/sources", plgh.ListSources)
 	wsAuth.GET("/plugins", plgh.ListInstalled)