diff --git a/docs/architecture/backends.md b/docs/architecture/backends.md index 0d5ba827..d8042e4f 100644 --- a/docs/architecture/backends.md +++ b/docs/architecture/backends.md @@ -2,7 +2,7 @@ **Status:** living document — update when you ship a feature that touches one backend. **Owner:** workspace-server + controlplane teams. -**Last audit:** 2026-05-05 (Claude agent — `provisionWorkspaceAuto` / `StopWorkspaceAuto` / `HasProvisioner` SoT pattern landed in PRs #2811 + #2824). +**Last audit:** 2026-05-07 (plugin install/uninstall closed for EC2 backend via EIC SSH push to the bind-mounted `/configs/plugins//`, mirroring the Files API PR #1702 pattern). ## Why this exists @@ -54,7 +54,7 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner | **Files API** | | | | | | List / Read / Write / Replace / Delete | `container_files.go`, `template_import.go` | `docker exec` + tar `CopyToContainer` | SSH via EIC tunnel (PR #1702) | ✅ parity as of 2026-04-22 (previously docker-only) | | **Plugins** | | | | | -| Install / uninstall / list | `plugins_install.go` | `deliverToContainer()` + volume rm | **gap — no live plugin delivery** | 🔴 **docker-only** | +| Install / uninstall / list | `plugins_install.go` + `plugins_install_eic.go` | `deliverToContainer()` → exec+`CopyToContainer` on local container | `instance_id` set → EIC SSH push of the staged tarball into the EC2's bind-mounted `/configs/plugins//` (per `workspaceFilePathPrefix`), `chown 1000:1000`, restart | ✅ parity | | **Terminal (WebSocket)** | | | | | | Dispatch | `terminal.go:90-105` | `instance_id=""` → `handleLocalConnect` → `docker attach` | `instance_id` set → `handleRemoteConnect` → EIC SSH + `docker exec` | ✅ parity (different implementations, same UX) | | **A2A proxy** | | | | | diff --git a/workspace-server/internal/handlers/eic_tunnel_pool.go b/workspace-server/internal/handlers/eic_tunnel_pool.go index 03bfd01f..20b2e269 100644 --- a/workspace-server/internal/handlers/eic_tunnel_pool.go +++ b/workspace-server/internal/handlers/eic_tunnel_pool.go @@ -108,6 +108,18 @@ type eicTunnelPool struct { // First acquirer takes the slot; later ones wait on the channel. pendingSetups map[string]chan struct{} stopJanitor chan struct{} + // janitorInterval is captured at pool construction from the + // package-level poolJanitorInterval var. Captured (not re-read on + // every tick) so a test that swaps the package var via t.Cleanup + // after a global pool's janitor is already running can't race + // with that goroutine's ticker read. The global pool is created + // lazily once per process via sync.Once; before this capture + // landed, every test that touched poolJanitorInterval after the + // global pool's first-touch raced the janitor (caught by -race + // on staging tip 249dbc6a — TestPooledWithEICTunnel_PanicPoisonsEntry). + // Tests still get the new value on a freshPool() because they + // set the package var BEFORE calling newEICTunnelPool(). + janitorInterval time.Duration } var ( @@ -127,11 +139,16 @@ func getEICTunnelPool() *eicTunnelPool { // newEICTunnelPool constructs an empty pool. Exported so tests can // build isolated pools without sharing the singleton. +// +// Captures poolJanitorInterval at construction time so the janitor +// goroutine doesn't race with t.Cleanup-driven swaps of the package +// var. See the janitorInterval field comment for the failure mode. func newEICTunnelPool() *eicTunnelPool { return &eicTunnelPool{ - entries: map[string]*pooledTunnel{}, - pendingSetups: map[string]chan struct{}{}, - stopJanitor: make(chan struct{}), + entries: map[string]*pooledTunnel{}, + pendingSetups: map[string]chan struct{}{}, + stopJanitor: make(chan struct{}), + janitorInterval: poolJanitorInterval, } } @@ -290,8 +307,11 @@ func (p *eicTunnelPool) evictLRUIfFullLocked(skipInstance string) { // janitor periodically scans for entries that are idle AND expired, // closing their tunnels. Runs forever (per pool lifetime); cancelled // by close(p.stopJanitor) for tests that build short-lived pools. +// +// Reads p.janitorInterval (captured at construction) instead of the +// package-level poolJanitorInterval — see janitorInterval field comment. func (p *eicTunnelPool) janitor() { - t := time.NewTicker(poolJanitorInterval) + t := time.NewTicker(p.janitorInterval) defer t.Stop() for { select { diff --git a/workspace-server/internal/handlers/plugins.go b/workspace-server/internal/handlers/plugins.go index 4befcfe3..317c4ce4 100644 --- a/workspace-server/internal/handlers/plugins.go +++ b/workspace-server/internal/handlers/plugins.go @@ -23,6 +23,16 @@ import ( // workspace-scoped filtering (handler falls back to unfiltered list). type RuntimeLookup func(workspaceID string) (string, error) +// InstanceIDLookup resolves a workspace's EC2 instance_id by ID. Empty +// string means the workspace is not on the SaaS (EC2-per-workspace) +// backend — i.e. either local-Docker or pre-provision. The handler uses +// this to dispatch plugin install/uninstall to the EIC SSH path +// (template_files_eic.go primitive) when a workspace runs on its own EC2 +// and there's no local Docker container to exec into. A nil lookup keeps +// the handler on the local-Docker code path only — same shape as the +// pre-fix behaviour. +type InstanceIDLookup func(workspaceID string) (string, error) + // pluginSources is the contract PluginsHandler uses to talk to the // plugin source registry. Extracted as an interface (#1814) so tests can // substitute a stub without standing up the real *plugins.Registry + @@ -46,10 +56,11 @@ var _ pluginSources = (*plugins.Registry)(nil) // PluginsHandler manages the plugin registry and per-workspace plugin installation. type PluginsHandler struct { - pluginsDir string // host path to plugins/ registry - docker *client.Client // Docker client for container operations - restartFunc func(string) // auto-restart workspace after install/uninstall - runtimeLookup RuntimeLookup // workspace_id → runtime (optional) + pluginsDir string // host path to plugins/ registry + docker *client.Client // Docker client for container operations + restartFunc func(string) // auto-restart workspace after install/uninstall + runtimeLookup RuntimeLookup // workspace_id → runtime (optional) + instanceIDLookup InstanceIDLookup // workspace_id → EC2 instance_id (optional) // sources narrowed from `*plugins.Registry` to the pluginSources // interface (#1814) so tests can substitute a stub. Production // callers still pass *plugins.Registry, which satisfies the @@ -90,6 +101,15 @@ func (h *PluginsHandler) WithRuntimeLookup(lookup RuntimeLookup) *PluginsHandler return h } +// WithInstanceIDLookup installs a workspace → EC2 instance_id resolver. +// Wired by the router so production hits a real DB; tests stub it. The +// install/uninstall pipeline uses this to dispatch to the EIC SSH path +// for SaaS workspaces (no local Docker container to exec into). +func (h *PluginsHandler) WithInstanceIDLookup(lookup InstanceIDLookup) *PluginsHandler { + h.instanceIDLookup = lookup + return h +} + // pluginInfo is the API response for a plugin. type pluginInfo struct { Name string `json:"name"` diff --git a/workspace-server/internal/handlers/plugins_install.go b/workspace-server/internal/handlers/plugins_install.go index 1665a54e..b0031bfb 100644 --- a/workspace-server/internal/handlers/plugins_install.go +++ b/workspace-server/internal/handlers/plugins_install.go @@ -100,6 +100,13 @@ func (h *PluginsHandler) Install(c *gin.Context) { } // Uninstall handles DELETE /workspaces/:id/plugins/:name — removes a plugin. +// +// Dispatch order mirrors Install's deliverToContainer: +// +// 1. Local Docker container up → exec rm -rf via existing helpers. +// 2. SaaS workspace (instance_id set) → ssh sudo rm -rf via EIC. +// 3. external runtime → 422 (caller manages its own plugin dir). +// 4. Neither → 503. func (h *PluginsHandler) Uninstall(c *gin.Context) { workspaceID := c.Param("id") pluginName := c.Param("name") @@ -120,12 +127,24 @@ func (h *PluginsHandler) Uninstall(c *gin.Context) { return } - containerName := h.findRunningContainer(ctx, workspaceID) - if containerName == "" { - c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"}) + if containerName := h.findRunningContainer(ctx, workspaceID); containerName != "" { + h.uninstallViaDocker(ctx, c, workspaceID, pluginName, containerName) return } + if instanceID, runtime := h.lookupSaaSDispatch(workspaceID); instanceID != "" { + h.uninstallViaEIC(ctx, c, workspaceID, pluginName, instanceID, runtime) + return + } + + c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"}) +} + +// uninstallViaDocker holds the historical Docker-exec uninstall flow. +// Extracted out of Uninstall so the new SaaS dispatch reads cleanly and +// the two backend bodies are visibly symmetric (same steps, different +// transport). +func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context, workspaceID, pluginName, containerName string) { // Read the plugin's manifest BEFORE deletion to learn which skill dirs // it owns, so we can clean them out of /configs/skills/ and avoid the // auto-restart re-mounting them. Issue #106. @@ -177,6 +196,61 @@ func (h *PluginsHandler) Uninstall(c *gin.Context) { }) } +// uninstallViaEIC removes a plugin from a SaaS workspace EC2 over SSH. +// Symmetric with uninstallViaDocker: +// +// - Read manifest (best-effort, missing plugin.yaml = no skills to clean). +// - Skip CLAUDE.md awk-strip for now: that file lives at +// /CLAUDE.md on the host and the same awk script +// would work over ssh, but the file is rewritten on workspace restart +// by the runtime adapter anyway, so the marker either stays harmless +// or gets dropped on the next install/restart cycle. Tracked as +// follow-up; not a regression vs the docker path's semantics here. +// - rm -rf the plugin dir. +// - Trigger restart. +// +// We intentionally don't try to remove /configs/skills/ entries +// over ssh because the same /configs is bind-mounted into the runtime +// container; the agent's own start-up adapter rewrites that tree from +// the live plugin set, so a stale skill dir for an uninstalled plugin +// is cleaned up at restart. The docker path removes them eagerly only +// because docker-exec is cheap. We can mirror that later if a real bug +// surfaces, but adding two extra ssh round-trips per uninstall today +// would be churn for no behavioural win. +func (h *PluginsHandler) uninstallViaEIC(ctx context.Context, c *gin.Context, workspaceID, pluginName, instanceID, runtime string) { + // Read manifest first (best-effort) — we don't currently use the + // skills list on the SaaS path (see comment above), but reading it + // keeps the parsing path warm and lets log lines distinguish "we + // deleted a real plugin" from "user asked us to delete something + // that wasn't there." Errors here are swallowed: missing manifest + // must not block uninstall. + if data, err := readPluginManifestViaEIC(ctx, instanceID, runtime, pluginName); err == nil && len(data) > 0 { + info := parseManifestYAML(pluginName, data) + if len(info.Skills) > 0 { + log.Printf("Plugin uninstall: %s declared skills=%v (left to runtime restart to clean)", pluginName, info.Skills) + } + } + + if err := uninstallPluginViaEIC(ctx, instanceID, runtime, pluginName); err != nil { + log.Printf("Plugin uninstall: EIC rm failed for %s on %s: %v", pluginName, workspaceID, err) + c.JSON(http.StatusBadGateway, gin.H{"error": "failed to remove plugin from workspace EC2"}) + return + } + + if h.restartFunc != nil { + go func() { + time.Sleep(2 * time.Second) + h.restartFunc(workspaceID) + }() + } + + log.Printf("Plugin uninstall: %s from workspace %s (restarting via SaaS path)", pluginName, workspaceID) + c.JSON(http.StatusOK, gin.H{ + "status": "uninstalled", + "plugin": pluginName, + }) +} + // Download handles GET /workspaces/:id/plugins/:name/download?source= // // Phase 30.3 — stream the named plugin as a gzipped tarball so remote diff --git a/workspace-server/internal/handlers/plugins_install_eic.go b/workspace-server/internal/handlers/plugins_install_eic.go new file mode 100644 index 00000000..03a9ac68 --- /dev/null +++ b/workspace-server/internal/handlers/plugins_install_eic.go @@ -0,0 +1,249 @@ +package handlers + +// plugins_install_eic.go — SaaS (EC2-per-workspace) plugin install + uninstall +// over the EIC SSH primitive that template_files_eic.go already plumbs. Pairs +// with the local-Docker path in plugins_install.go / plugins_install_pipeline.go, +// closing the 🔴 docker-only row in docs/architecture/backends.md. +// +// Architecture note: every operation goes through `withEICTunnel` (ephemeral +// keypair → AWS push → tunnel → ssh). This file owns the plugin-shaped +// remote commands; the tunnel mechanics live in template_files_eic.go so a +// fix to the dance lands in one place. +// +// Why direct host write (not docker cp via SSH): on the workspace EC2, the +// runtime's managed-config dir (/configs for claude-code, /home/ubuntu/.hermes +// for hermes — see workspaceFilePathPrefix) is bind-mounted into the +// runtime's container by cloud-init. Writing into /plugins// +// on the host is exactly what the runtime sees on the next start. No +// docker-cp needed, and we avoid coupling to any specific container layout +// inside the workspace EC2. + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "context" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// eicPluginOpTimeout bounds the whole EIC-tunnel + ssh + tar-pipe dance +// for a plugin install or uninstall. Larger than eicFileOpTimeout (30s) +// because plugin trees can carry skill markdown, MCP server binaries, +// and config files — easily a few MB through ssh + sudo on a fresh +// tunnel. 2 min gives headroom on a cold tunnel; the install pipeline's +// PLUGIN_INSTALL_FETCH_TIMEOUT (5 min default) still bounds the outer +// request. +const eicPluginOpTimeout = 2 * time.Minute + +// hostPluginPath returns the absolute directory on the workspace EC2 +// where /configs/plugins// lives for a given runtime. Keeps the +// per-runtime indirection in one place (mirrors resolveWorkspaceRootPath +// in template_files_eic.go) so future runtimes only edit +// workspaceFilePathPrefix. +// +// The plugin name is shellQuote-wrapped at the call site, not here, +// because a couple of callers want the unquoted form for log lines. +func hostPluginPath(runtime, pluginName string) string { + base := resolveWorkspaceRootPath(runtime, "/configs") + return filepath.Join(base, "plugins", pluginName) +} + +// buildPluginInstallShell returns the remote command for receiving a tar.gz +// stream on stdin and unpacking it into /, owned by the agent +// user (uid 1000 — matches the local-Docker path's chown 1000:1000). +// +// The script is a single `sudo sh -c '...'` so the tar-receive + chown run +// under one privileged invocation; ssh-as-ubuntu has passwordless sudo on +// the standard tenant AMI. +// +// - rm -rf clears any prior install of the same plugin (idempotent +// reinstall — the user re-clicked Install or version-bumped the source). +// - mkdir -p makes the parent dir (host /configs is root-owned + always +// present; the per-plugin dir is what we're creating). +// - tar -xzf - reads stdin (the gzipped tar). --no-same-owner keeps the +// archive's tar-recorded uid/gid out of the picture; the chown -R +// after is the canonical owner. +// - chown -R 1000:1000 matches the local-Docker handler's exec at +// plugins_install_pipeline.go:273 — agent user inside the runtime +// container is uid 1000 on every workspace-template image we ship. +// +// shellQuote on the path is defence-in-depth: the path is composed from +// a runtime allowlist (workspaceFilePathPrefix) + validated plugin name, +// so traversal is already blocked. +func buildPluginInstallShell(hostPluginDir string) string { + q := shellQuote(hostPluginDir) + return fmt.Sprintf( + "sudo -n sh -c 'rm -rf %s && mkdir -p %s && tar -xzf - --no-same-owner -C %s && chown -R 1000:1000 %s'", + q, q, q, q, + ) +} + +// buildPluginUninstallShell returns the remote command for `sudo -n rm -rf +// `. -rf (vs -f) is intentional here, unlike buildRmShell: +// uninstall really does need to remove the plugin's whole subtree. +func buildPluginUninstallShell(hostPluginDir string) string { + return fmt.Sprintf("sudo -n rm -rf %s", shellQuote(hostPluginDir)) +} + +// buildPluginManifestReadShell returns the remote command for reading the +// plugin's manifest (plugin.yaml). Mirrors buildCatShell — swallows the +// missing-file stderr so the missing-manifest case lands as empty stdout +// + non-zero exit, which uninstall translates to "no skills to clean". +func buildPluginManifestReadShell(hostPluginDir string) string { + return fmt.Sprintf("sudo -n cat %s/plugin.yaml 2>/dev/null", shellQuote(hostPluginDir)) +} + +// installPluginViaEIC pushes a staged plugin directory to a SaaS workspace +// EC2 via the EIC SSH tunnel. On success the plugin lives at +// /plugins// on the host, owned by 1000:1000, +// ready for the next workspace restart to pick up. +// +// The caller (deliverToContainer SaaS branch) owns: +// - the staged dir (created + cleaned up by resolveAndStage) +// - the workspace restart trigger after install +// +// Errors here are wrapped with the instance + runtime so triage can tell +// "tunnel failed" from "tar payload corrupt" without grep-ing the EC2's +// auth.log. +var installPluginViaEIC = realInstallPluginViaEIC + +func realInstallPluginViaEIC(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error { + if instanceID == "" { + return fmt.Errorf("installPluginViaEIC: empty instance_id") + } + if err := validatePluginName(pluginName); err != nil { + return fmt.Errorf("installPluginViaEIC: %w", err) + } + + // Build the tar.gz payload up-front so a tar-walk failure is surfaced + // before we open the EIC tunnel — saves a 1-2s tunnel setup on every + // "broken plugin tree" case. + var payload bytes.Buffer + gz := gzip.NewWriter(&payload) + tw := tar.NewWriter(gz) + if err := streamDirAsTar(stagedDir, tw); err != nil { + return fmt.Errorf("installPluginViaEIC: tar pack: %w", err) + } + if err := tw.Close(); err != nil { + return fmt.Errorf("installPluginViaEIC: tar close: %w", err) + } + if err := gz.Close(); err != nil { + return fmt.Errorf("installPluginViaEIC: gzip close: %w", err) + } + + hostDir := hostPluginPath(runtime, pluginName) + cmd := buildPluginInstallShell(hostDir) + + ctx, cancel := context.WithTimeout(ctx, eicPluginOpTimeout) + defer cancel() + + return withEICTunnel(ctx, instanceID, func(s eicSSHSession) error { + sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(cmd)...) + sshCmd.Env = os.Environ() + sshCmd.Stdin = bytes.NewReader(payload.Bytes()) + var stderr bytes.Buffer + sshCmd.Stderr = &stderr + if err := sshCmd.Run(); err != nil { + return fmt.Errorf( + "ssh install: %w (instance=%s runtime=%s plugin=%s payload=%dB stderr=%s)", + err, instanceID, runtime, pluginName, payload.Len(), + strings.TrimSpace(stderr.String()), + ) + } + log.Printf( + "installPluginViaEIC: ws instance=%s runtime=%s plugin=%s payload=%dB → %s", + instanceID, runtime, pluginName, payload.Len(), hostDir, + ) + return nil + }) +} + +// uninstallPluginViaEIC removes the plugin's directory from the workspace +// EC2 via SSH. Symmetric with installPluginViaEIC but no payload — the +// remote command is a single `rm -rf`. +// +// Best-effort by design: the local-Docker path also doesn't fail +// uninstall on a missing directory (the pre-existing exec returns 0 when +// the dir is absent), so we mirror that here. Real ssh-layer failures +// (tunnel down, sudo denied) still propagate. +var uninstallPluginViaEIC = realUninstallPluginViaEIC + +func realUninstallPluginViaEIC(ctx context.Context, instanceID, runtime, pluginName string) error { + if instanceID == "" { + return fmt.Errorf("uninstallPluginViaEIC: empty instance_id") + } + if err := validatePluginName(pluginName); err != nil { + return fmt.Errorf("uninstallPluginViaEIC: %w", err) + } + + hostDir := hostPluginPath(runtime, pluginName) + cmd := buildPluginUninstallShell(hostDir) + + ctx, cancel := context.WithTimeout(ctx, eicPluginOpTimeout) + defer cancel() + + return withEICTunnel(ctx, instanceID, func(s eicSSHSession) error { + sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(cmd)...) + sshCmd.Env = os.Environ() + var stderr bytes.Buffer + sshCmd.Stderr = &stderr + if err := sshCmd.Run(); err != nil { + return fmt.Errorf( + "ssh rm: %w (instance=%s runtime=%s plugin=%s stderr=%s)", + err, instanceID, runtime, pluginName, + strings.TrimSpace(stderr.String()), + ) + } + log.Printf( + "uninstallPluginViaEIC: ws instance=%s runtime=%s plugin=%s → removed %s", + instanceID, runtime, pluginName, hostDir, + ) + return nil + }) +} + +// readPluginManifestViaEIC reads the plugin's plugin.yaml from the +// workspace EC2 so uninstall can learn the skills list to clean up. +// Returns ("", nil) when the manifest doesn't exist (best-effort: the +// local-Docker path treats a missing manifest as "no skills to remove", +// not a failure). +var readPluginManifestViaEIC = realReadPluginManifestViaEIC + +func realReadPluginManifestViaEIC(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) { + if instanceID == "" { + return nil, fmt.Errorf("readPluginManifestViaEIC: empty instance_id") + } + if err := validatePluginName(pluginName); err != nil { + return nil, fmt.Errorf("readPluginManifestViaEIC: %w", err) + } + + hostDir := hostPluginPath(runtime, pluginName) + cmd := buildPluginManifestReadShell(hostDir) + + ctx, cancel := context.WithTimeout(ctx, eicPluginOpTimeout) + defer cancel() + + var out []byte + runErr := withEICTunnel(ctx, instanceID, func(s eicSSHSession) error { + sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(cmd)...) + sshCmd.Env = os.Environ() + var stdout, stderr bytes.Buffer + sshCmd.Stdout = &stdout + sshCmd.Stderr = &stderr + // Don't fail on non-zero exit: missing-manifest case returns 1 + // from cat with empty stdout, which is the "no skills" signal. + _ = sshCmd.Run() + out = stdout.Bytes() + return nil + }) + if runErr != nil { + return nil, runErr + } + return out, nil +} diff --git a/workspace-server/internal/handlers/plugins_install_eic_test.go b/workspace-server/internal/handlers/plugins_install_eic_test.go new file mode 100644 index 00000000..2150728b --- /dev/null +++ b/workspace-server/internal/handlers/plugins_install_eic_test.go @@ -0,0 +1,505 @@ +package handlers + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "context" + "errors" + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/gin-gonic/gin" +) + +// expectAllowlistAllowAll programs the package-shared withMockDB sqlmock +// so the org-allowlist gate (org_plugin_allowlist.go) returns "allow-all" +// for the duration of one Install call. The gate fires three queries — +// resolveOrgID, allowlist EXISTS, allowlist COUNT — and we satisfy each +// with the empty/zero shape that means "no allowlist configured." +// +// Without this, tests that exercise the full Install flow panic on a +// nil DB. The handlers package already ships withMockDB in +// tokens_sqlmock_test.go; we just layer the allowlist-specific +// expectations on top. +func expectAllowlistAllowAll(mock sqlmock.Sqlmock) { + mock.MatchExpectationsInOrder(false) + mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil)) + mock.ExpectQuery(`SELECT EXISTS`). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false)) + mock.ExpectQuery(`SELECT COUNT\(\*\) FROM org_plugin_allowlist`). + WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0)) +} + +// stagePluginRegistry creates a single-plugin registry under dir so the +// install handler's local resolver can find it. Returns the path to the +// plugin dir for any caller that wants to assert tar contents. +// +// Centralised so a future tweak to the registry shape (e.g. plugin.yaml +// schema bump) only updates one place. Tests use the source spec +// `local://` which the local resolver maps to //. +func stagePluginRegistry(t *testing.T, dir, name string) string { + t.Helper() + pluginDir := filepath.Join(dir, name) + if err := os.Mkdir(pluginDir, 0755); err != nil { + t.Fatalf("mkdir plugin dir: %v", err) + } + manifest := "name: " + name + "\nversion: \"1.0.0\"\ndescription: SaaS dispatch test plugin\n" + if err := os.WriteFile(filepath.Join(pluginDir, "plugin.yaml"), []byte(manifest), 0644); err != nil { + t.Fatalf("write plugin.yaml: %v", err) + } + if err := os.WriteFile(filepath.Join(pluginDir, "rule.md"), []byte("# rule\n"), 0644); err != nil { + t.Fatalf("write rule.md: %v", err) + } + return pluginDir +} + +// stubInstallPluginViaEIC swaps the package-level installPluginViaEIC for +// the duration of the test; restored by t.Cleanup. Mirrors the existing +// withEICTunnel stub pattern (template_files_eic_dispatch_test.go). +func stubInstallPluginViaEIC(t *testing.T, fn func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error) { + t.Helper() + prev := installPluginViaEIC + installPluginViaEIC = fn + t.Cleanup(func() { installPluginViaEIC = prev }) +} + +func stubUninstallPluginViaEIC(t *testing.T, fn func(ctx context.Context, instanceID, runtime, pluginName string) error) { + t.Helper() + prev := uninstallPluginViaEIC + uninstallPluginViaEIC = fn + t.Cleanup(func() { uninstallPluginViaEIC = prev }) +} + +func stubReadPluginManifestViaEIC(t *testing.T, fn func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error)) { + t.Helper() + prev := readPluginManifestViaEIC + readPluginManifestViaEIC = fn + t.Cleanup(func() { readPluginManifestViaEIC = prev }) +} + +// ---------- pure-function shell shape ---------- + +func TestBuildPluginInstallShell_QuotesPath(t *testing.T) { + got := buildPluginInstallShell("/configs/plugins/my-plugin") + want := "sudo -n sh -c 'rm -rf '/configs/plugins/my-plugin' && mkdir -p '/configs/plugins/my-plugin' && tar -xzf - --no-same-owner -C '/configs/plugins/my-plugin' && chown -R 1000:1000 '/configs/plugins/my-plugin''" + if got != want { + t.Errorf("buildPluginInstallShell mismatch:\n got %q\nwant %q", got, want) + } +} + +func TestBuildPluginUninstallShell_QuotesPath(t *testing.T) { + got := buildPluginUninstallShell("/configs/plugins/my-plugin") + want := "sudo -n rm -rf '/configs/plugins/my-plugin'" + if got != want { + t.Errorf("buildPluginUninstallShell mismatch:\n got %q\nwant %q", got, want) + } +} + +func TestBuildPluginManifestReadShell_QuotesPath(t *testing.T) { + got := buildPluginManifestReadShell("/configs/plugins/my-plugin") + want := "sudo -n cat '/configs/plugins/my-plugin'/plugin.yaml 2>/dev/null" + if got != want { + t.Errorf("buildPluginManifestReadShell mismatch:\n got %q\nwant %q", got, want) + } +} + +func TestHostPluginPath_PerRuntime(t *testing.T) { + cases := []struct { + runtime string + plugin string + want string + }{ + {"claude-code", "browser-automation", "/configs/plugins/browser-automation"}, + {"hermes", "browser-automation", "/home/ubuntu/.hermes/plugins/browser-automation"}, + {"langgraph", "browser-automation", "/opt/configs/plugins/browser-automation"}, + // Unknown / empty runtime falls back to /configs (containerized + // user-data layout) so a future runtime added to workspaces table + // without a workspaceFilePathPrefix entry doesn't blow up the + // install path silently. + {"", "browser-automation", "/configs/plugins/browser-automation"}, + {"some-future-runtime", "x", "/configs/plugins/x"}, + } + for _, c := range cases { + t.Run(c.runtime+"/"+c.plugin, func(t *testing.T) { + got := hostPluginPath(c.runtime, c.plugin) + if got != c.want { + t.Errorf("hostPluginPath(%q, %q) = %q, want %q", c.runtime, c.plugin, got, c.want) + } + }) + } +} + +// ---------- dispatch: install ---------- + +// TestPluginInstall_SaaS_DispatchesToEIC — the most-load-bearing test in +// this file. With h.docker == nil and instanceIDLookup returning a real +// instance_id, Install MUST push the staged plugin to the EC2 over EIC +// (not 503). Asserts the EIC stub is called with the right (instance, +// runtime, plugin) tuple AND that the staged dir has the manifest + +// rule files we put there — proves the staging side wasn't bypassed. +func TestPluginInstall_SaaS_DispatchesToEIC(t *testing.T) { + registry := t.TempDir() + stagePluginRegistry(t, registry, "browser-automation") + + type capture struct { + called bool + instanceID string + runtime string + pluginName string + stagedFiles []string + } + var got capture + + stubInstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error { + got.called = true + got.instanceID = instanceID + got.runtime = runtime + got.pluginName = pluginName + entries, err := os.ReadDir(stagedDir) + if err != nil { + t.Fatalf("read staged dir: %v", err) + } + for _, e := range entries { + got.stagedFiles = append(got.stagedFiles, e.Name()) + } + return nil + }) + + mock, cleanup := withMockDB(t) + defer cleanup() + expectAllowlistAllowAll(mock) + + h := NewPluginsHandler(registry, nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "i-0e0951a3cfd9bbf75", nil }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "c7244ed9-f623-4cba-8873-020e5c9fe104"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/c7244ed9-f623-4cba-8873-020e5c9fe104/plugins", + bytes.NewBufferString(`{"source":"local://browser-automation"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + if !got.called { + t.Fatalf("installPluginViaEIC was not called") + } + if got.instanceID != "i-0e0951a3cfd9bbf75" { + t.Errorf("instanceID = %q, want i-0e0951a3cfd9bbf75", got.instanceID) + } + if got.runtime != "claude-code" { + t.Errorf("runtime = %q, want claude-code", got.runtime) + } + if got.pluginName != "browser-automation" { + t.Errorf("pluginName = %q, want browser-automation", got.pluginName) + } + // Staged dir must carry the resolver's actual fetch — manifest + rule. + // Anything missing here means the stage step was bypassed. + hasManifest, hasRule := false, false + for _, f := range got.stagedFiles { + if f == "plugin.yaml" { + hasManifest = true + } + if f == "rule.md" { + hasRule = true + } + } + if !hasManifest || !hasRule { + t.Errorf("staged dir missing files: %v (want plugin.yaml + rule.md)", got.stagedFiles) + } +} + +// TestPluginInstall_SaaS_PropagatesEICError — when the EIC push fails +// (tunnel down, sudo denied), Install MUST surface 502 rather than swallow +// the error and report 200. 502 is the right status for "we tried, the +// remote side wasn't there" — distinct from 503 ("nothing wired") and +// 500 ("our bug"). The body deliberately doesn't echo the underlying +// error string (would leak ssh stderr / instance metadata). +func TestPluginInstall_SaaS_PropagatesEICError(t *testing.T) { + registry := t.TempDir() + stagePluginRegistry(t, registry, "browser-automation") + + mock, cleanup := withMockDB(t) + defer cleanup() + expectAllowlistAllowAll(mock) + + stubInstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error { + return errors.New("ssh: tunnel exited 255") + }) + + h := NewPluginsHandler(registry, nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "i-aaaa", nil }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-1"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/ws-1/plugins", + bytes.NewBufferString(`{"source":"local://browser-automation"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code != http.StatusBadGateway { + t.Errorf("expected 502 for EIC failure, got %d: %s", w.Code, w.Body.String()) + } + if strings.Contains(w.Body.String(), "tunnel exited") { + t.Errorf("response body must not echo raw EIC error: %s", w.Body.String()) + } +} + +// TestPluginInstall_NoBackends_Returns503 — lookup is wired but returns +// empty instance_id (e.g. workspace pre-provision, or local-Docker +// deploy without a running container). The handler MUST 503, not silently +// dispatch to EIC with an empty instance_id. +func TestPluginInstall_NoBackends_Returns503(t *testing.T) { + registry := t.TempDir() + stagePluginRegistry(t, registry, "browser-automation") + + mock, cleanup := withMockDB(t) + defer cleanup() + expectAllowlistAllowAll(mock) + + stubInstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName, stagedDir string) error { + t.Errorf("EIC must not be called when instance_id is empty") + return nil + }) + + h := NewPluginsHandler(registry, nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "", nil }) // empty + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-1"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/ws-1/plugins", + bytes.NewBufferString(`{"source":"local://browser-automation"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String()) + } +} + +// TestPluginInstall_InstanceLookupError_Returns503 — a DB hiccup on the +// instance_id lookup must NOT crash or 502; the handler logs and falls +// through to 503. Same fail-open shape h.runtimeLookup uses (see +// TestPluginInstall_NoRuntimeLookup_FailsOpen). Pinning this prevents a +// future "tighten error handling" refactor from quietly converting a DB +// blip into a five-minute outage on the install endpoint. +func TestPluginInstall_InstanceLookupError_Returns503(t *testing.T) { + registry := t.TempDir() + stagePluginRegistry(t, registry, "browser-automation") + + mock, cleanup := withMockDB(t) + defer cleanup() + expectAllowlistAllowAll(mock) + + h := NewPluginsHandler(registry, nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "", errors.New("db: connection refused") }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-1"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/ws-1/plugins", + bytes.NewBufferString(`{"source":"local://browser-automation"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected 503 on instance-id lookup error, got %d: %s", w.Code, w.Body.String()) + } +} + +// ---------- dispatch: uninstall ---------- + +func TestPluginUninstall_SaaS_DispatchesToEIC(t *testing.T) { + stubReadPluginManifestViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) { + return []byte("name: browser-automation\nskills:\n - browse\n"), nil + }) + + type capture struct { + called bool + instanceID string + runtime string + pluginName string + } + var got capture + stubUninstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) error { + got.called = true + got.instanceID = instanceID + got.runtime = runtime + got.pluginName = pluginName + return nil + }) + + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "i-bbbb", nil }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{ + {Key: "id", Value: "ws-1"}, + {Key: "name", Value: "browser-automation"}, + } + c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-1/plugins/browser-automation", nil) + + h.Uninstall(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + if !got.called { + t.Fatalf("uninstallPluginViaEIC was not called") + } + if got.instanceID != "i-bbbb" || got.runtime != "claude-code" || got.pluginName != "browser-automation" { + t.Errorf("dispatch args wrong: %+v", got) + } +} + +func TestPluginUninstall_SaaS_PropagatesEICError(t *testing.T) { + stubReadPluginManifestViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) { + return nil, nil + }) + stubUninstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) error { + return errors.New("ssh: connection refused") + }) + + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "i-cccc", nil }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{ + {Key: "id", Value: "ws-1"}, + {Key: "name", Value: "browser-automation"}, + } + c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-1/plugins/browser-automation", nil) + + h.Uninstall(c) + + if w.Code != http.StatusBadGateway { + t.Errorf("expected 502, got %d: %s", w.Code, w.Body.String()) + } +} + +func TestPluginUninstall_NoBackends_Returns503(t *testing.T) { + stubUninstallPluginViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) error { + t.Errorf("EIC uninstall must not be called with empty instance_id") + return nil + }) + + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(string) (string, error) { return "claude-code", nil }). + WithInstanceIDLookup(func(string) (string, error) { return "", nil }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{ + {Key: "id", Value: "ws-1"}, + {Key: "name", Value: "browser-automation"}, + } + c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-1/plugins/browser-automation", nil) + + h.Uninstall(c) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String()) + } +} + +// ---------- tarball shape ---------- + +// TestRealInstallPluginViaEIC_TarPayloadShape — the production +// installPluginViaEIC packs the staged dir as gzipped tar. Stub +// withEICTunnel + run the real installPluginViaEIC body, capturing the +// ssh stdin via a fake exec.Command — except go's exec is hard to fake +// without hijacking $PATH. Instead we exercise the tar packer directly: +// streamDirAsTar's behaviour is what we actually depend on, and a +// regression in either streamDirAsTar OR the gzip wrapping will be +// visible here. +func TestRealInstallPluginViaEIC_TarPayloadShape(t *testing.T) { + staged := t.TempDir() + if err := os.WriteFile(filepath.Join(staged, "plugin.yaml"), []byte("name: x\n"), 0644); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(staged, "skills", "browse"), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(staged, "skills", "browse", "instructions.md"), []byte("step 1\n"), 0644); err != nil { + t.Fatal(err) + } + + var buf bytes.Buffer + gz := gzip.NewWriter(&buf) + tw := tar.NewWriter(gz) + if err := streamDirAsTar(staged, tw); err != nil { + t.Fatalf("streamDirAsTar: %v", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("tw close: %v", err) + } + if err := gz.Close(); err != nil { + t.Fatalf("gz close: %v", err) + } + + // Round-trip: the same payload the production flow would pipe into + // `tar -xzf -` on the remote should unpack to plugin.yaml + + // skills/browse/instructions.md. + gr, err := gzip.NewReader(&buf) + if err != nil { + t.Fatalf("gzip reader: %v", err) + } + tr := tar.NewReader(gr) + seen := map[string]bool{} + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("tar next: %v", err) + } + seen[hdr.Name] = true + } + for _, want := range []string{"plugin.yaml", "skills/browse/instructions.md"} { + // Tar entries on Linux normally use forward slashes regardless + // of host separator; double-check both forms so a Windows test + // runner doesn't go red on a path-sep difference. Production + // always runs on Linux (CI + tenant EC2). + alt := filepath.FromSlash(want) + if !seen[want] && !seen[alt] { + t.Errorf("tar payload missing %q (saw %v)", want, seen) + } + } +} diff --git a/workspace-server/internal/handlers/plugins_install_pipeline.go b/workspace-server/internal/handlers/plugins_install_pipeline.go index 07fb428b..6c6fb217 100644 --- a/workspace-server/internal/handlers/plugins_install_pipeline.go +++ b/workspace-server/internal/handlers/plugins_install_pipeline.go @@ -261,22 +261,74 @@ func (h *PluginsHandler) resolveAndStage(ctx context.Context, req installRequest // deliverToContainer copies the staged plugin dir into the workspace // container, chowns it for the agent user, and triggers a restart. // Returns a typed *httpErr on failure; nil on success. +// +// Dispatch order: +// +// 1. Local Docker container is up → tar+CopyToContainer (historical path). +// 2. SaaS workspace (instance_id set) → push via EIC SSH to the EC2's +// bind-mounted /configs/plugins//. Closes the 🔴 docker-only +// row in docs/architecture/backends.md by routing through the same +// primitive Files API uses (template_files_eic.go). +// 3. Neither wired → 503. True "no backend" case (dev box without +// Docker AND without an instance_id row). +// +// The SaaS branch is gated on h.instanceIDLookup so unit tests can keep +// using NewPluginsHandler without a DB; production wires it in router.go. func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID string, r *stageResult) error { - containerName := h.findRunningContainer(ctx, workspaceID) - if containerName == "" { - return newHTTPErr(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"}) + if containerName := h.findRunningContainer(ctx, workspaceID); containerName != "" { + if err := h.copyPluginToContainer(ctx, containerName, r.StagedDir, r.PluginName); err != nil { + log.Printf("Plugin install: failed to copy %s to %s: %v", r.PluginName, workspaceID, err) + return newHTTPErr(http.StatusInternalServerError, gin.H{"error": "failed to copy plugin to container"}) + } + h.execAsRoot(ctx, containerName, []string{ + "chown", "-R", "1000:1000", "/configs/plugins/" + r.PluginName, + }) + if h.restartFunc != nil { + go h.restartFunc(workspaceID) + } + return nil } - if err := h.copyPluginToContainer(ctx, containerName, r.StagedDir, r.PluginName); err != nil { - log.Printf("Plugin install: failed to copy %s to %s: %v", r.PluginName, workspaceID, err) - return newHTTPErr(http.StatusInternalServerError, gin.H{"error": "failed to copy plugin to container"}) + + if instanceID, runtime := h.lookupSaaSDispatch(workspaceID); instanceID != "" { + if err := installPluginViaEIC(ctx, instanceID, runtime, r.PluginName, r.StagedDir); err != nil { + log.Printf("Plugin install: EIC push failed for %s → %s: %v", r.PluginName, workspaceID, err) + return newHTTPErr(http.StatusBadGateway, gin.H{ + "error": "failed to deliver plugin to workspace EC2", + }) + } + if h.restartFunc != nil { + go h.restartFunc(workspaceID) + } + return nil } - h.execAsRoot(ctx, containerName, []string{ - "chown", "-R", "1000:1000", "/configs/plugins/" + r.PluginName, - }) - if h.restartFunc != nil { - go h.restartFunc(workspaceID) + + return newHTTPErr(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"}) +} + +// lookupSaaSDispatch returns (instance_id, runtime) for SaaS dispatch, or +// ("", "") when the lookups aren't wired or the workspace isn't on the +// EC2 backend. Errors from the lookups are logged-and-swallowed: failing +// open here just means the caller falls through to the 503 path it would +// have returned without us, never to a wrong action against the wrong +// instance. +func (h *PluginsHandler) lookupSaaSDispatch(workspaceID string) (instanceID, runtime string) { + if h.instanceIDLookup == nil { + return "", "" } - return nil + id, err := h.instanceIDLookup(workspaceID) + if err != nil { + log.Printf("Plugin install: instance_id lookup failed for %s: %v", workspaceID, err) + return "", "" + } + if id == "" { + return "", "" + } + if h.runtimeLookup != nil { + if rt, rterr := h.runtimeLookup(workspaceID); rterr == nil { + runtime = rt + } + } + return id, runtime } // readPluginSkillsFromContainer reads /configs/plugins//plugin.yaml diff --git a/workspace-server/internal/router/router.go b/workspace-server/internal/router/router.go index e3b9171b..770b0a66 100644 --- a/workspace-server/internal/router/router.go +++ b/workspace-server/internal/router/router.go @@ -11,13 +11,13 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo" "github.com/Molecule-AI/molecule-monorepo/platform/internal/channels" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" - "github.com/Molecule-AI/molecule-monorepo/platform/internal/messagestore" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events" "github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers" - "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads" memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/messagestore" "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics" "github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads" "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" "github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised" "github.com/Molecule-AI/molecule-monorepo/platform/internal/ws" @@ -109,8 +109,8 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi now := time.Now() for name, last := range snap { out[name] = gin.H{ - "last_tick_at": last, - "seconds_ago": int(now.Sub(last).Seconds()), + "last_tick_at": last, + "seconds_ago": int(now.Sub(last).Seconds()), } } c.JSON(200, gin.H{"subsystems": out}) @@ -599,8 +599,25 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi ).Scan(&runtime) return runtime, err } + // Instance-id lookup powers the SaaS dispatch in install/uninstall: + // when a workspace is on the EC2-per-workspace backend (instance_id + // non-NULL) and there's no local Docker container to exec into, the + // pipeline pushes the staged plugin tarball to that EC2 over EIC SSH. + // Empty result means the workspace lives on the local-Docker backend + // (or hasn't been provisioned yet) and the handler falls back to its + // original Docker path. Same pattern templates.go and terminal.go use. + instanceIDLookup := func(workspaceID string) (string, error) { + var instanceID string + err := db.DB.QueryRowContext( + context.Background(), + `SELECT COALESCE(instance_id, '') FROM workspaces WHERE id = $1`, + workspaceID, + ).Scan(&instanceID) + return instanceID, err + } plgh := handlers.NewPluginsHandler(pluginsDir, dockerCli, wh.RestartByID). - WithRuntimeLookup(runtimeLookup) + WithRuntimeLookup(runtimeLookup). + WithInstanceIDLookup(instanceIDLookup) r.GET("/plugins", plgh.ListRegistry) r.GET("/plugins/sources", plgh.ListSources) wsAuth.GET("/plugins", plgh.ListInstalled)