From 03741d1110b91eb65a229f3303707cb3c63c807c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 22 Apr 2026 18:27:12 -0700 Subject: [PATCH] feat(files-api): SSH-backed write for SaaS workspaces (fixes 500 docker not available) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symptom (prod, hongmingwang tenant, 2026-04-22): PUT /workspaces/:id/files/config.yaml → 500 {"error":"failed to write file: docker not available"} Root cause: WriteFile + ReplaceFiles always reached for the tenant's Docker client, but SaaS workspaces run as EC2 VMs (no Docker on the tenant to cp into). There was no SaaS code path, so Save/Save&Restart in the Config tab silently 500'd for every SaaS user. Fix: add writeFileViaEIC — same ephemeral-keypair + EIC-tunnel dance that the Terminal tab already uses (terminal.go). Flow: 1. ssh-keygen ephemeral ed25519 pair 2. aws ec2-instance-connect send-ssh-public-key (60s validity) 3. aws ec2-instance-connect open-tunnel (TLS → :22) 4. ssh ... "install -D -m 0644 /dev/stdin " install -D creates missing parent dirs atomically 5. Kill tunnel + wipe keydir Runtime → base-path map (new table workspaceFilePathPrefix): hermes → /home/ubuntu/.hermes langgraph → /opt/configs external → /opt/configs unknown → /opt/configs Both WriteFile (single file) and ReplaceFiles (bulk) detect `workspaces.instance_id != ''` and route to EIC instead of Docker. Local/self-hosted Docker path is unchanged. Security: the only variable piece in the remote ssh command is the absolute path, which is built via map lookup + filepath.Clean so traversal is blocked. shellQuote() wraps it as defence-in-depth. validateRelPath rejects absolute paths and surviving `..` segments up-front; tests assert traversal rejection. Follow-ups tracked separately: - Reload hook after save (hermes gateway restart via SSH) - Per-tunnel batching for ReplaceFiles with many files - Runtime-specific base paths should be declared in the runtime manifest, not hardcoded in the handler Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/template_files_eic.go | 182 ++++++++++++++++++ .../handlers/template_files_eic_test.go | 85 ++++++++ .../internal/handlers/template_import.go | 29 ++- .../internal/handlers/templates.go | 21 +- 4 files changed, 312 insertions(+), 5 deletions(-) create mode 100644 workspace-server/internal/handlers/template_files_eic.go create mode 100644 workspace-server/internal/handlers/template_files_eic_test.go diff --git a/workspace-server/internal/handlers/template_files_eic.go b/workspace-server/internal/handlers/template_files_eic.go new file mode 100644 index 00000000..2c8858be --- /dev/null +++ b/workspace-server/internal/handlers/template_files_eic.go @@ -0,0 +1,182 @@ +package handlers + +// template_files_eic.go — SSH-backed file write for SaaS workspaces +// (EC2-per-workspace). Pairs with the existing Docker-path in templates.go +// (WriteFile) and template_import.go (ReplaceFiles). +// +// Flow for a single file write: +// 1. Generate ephemeral ed25519 keypair (on-disk for ≤ write duration). +// 2. Push the public key via `aws ec2-instance-connect send-ssh-public-key` +// so the target sshd accepts it for the next 60s. +// 3. Open a TLS-tunnelled TCP port via `aws ec2-instance-connect open-tunnel` +// from a local free port → workspace's sshd on 22. +// 4. Pipe content to `ssh ... "install -D -m 0644 /dev/stdin "`. +// `install -D` creates any missing parent dirs atomically. File is owned +// by whichever $OSUser we authenticated as (ubuntu by default). +// 5. Close tunnel + wipe keydir. +// +// All the AWS calls + ssh tunnel exec go through the same package-level +// func vars defined in terminal.go (openTunnelCmd, sendSSHPublicKey) so +// tests can stub them the same way the terminal tests do. + +import ( + "bytes" + "context" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// workspaceFilePathPrefix maps a runtime name to the absolute base path on +// the workspace EC2 where the Files API's relative paths land. New runtimes +// can be added here without touching handler code. +// +// Keep these stable — changing the base path for an existing runtime +// without a migration shim will make previously-saved files disappear from +// the runtime's POV. +var workspaceFilePathPrefix = map[string]string{ + "hermes": "/home/ubuntu/.hermes", + "langgraph": "/opt/configs", + "external": "/opt/configs", + // Default for unknown / future runtimes is /opt/configs — most + // conservative place that doesn't collide with system or runtime- + // private directories. +} + +func resolveWorkspaceFilePath(runtime, relPath string) (string, error) { + if err := validateRelPath(relPath); err != nil { + return "", err + } + base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))] + if !ok { + base = "/opt/configs" + } + return filepath.Join(base, filepath.Clean(relPath)), nil +} + +// eicFileWriteTimeout bounds the whole dance. Key push is <500ms, tunnel +// is 1-2s, ssh + write is <2s. 30s gives headroom for slow pulls without +// hanging the Files API forever under EIC misconfiguration. +const eicFileWriteTimeout = 30 * time.Second + +// writeFileViaEIC writes a single file to the workspace EC2 at the +// absolute path that resolveWorkspaceFilePath computed. On success, +// optionally invokes the runtime's reload hook (not implemented yet — +// tracked as follow-up; for today the canvas issues a separate Restart +// after Save). +// +// instanceID: AWS EC2 instance id from workspaces.instance_id. +// runtime: used only for path-prefix resolution. +// relPath: the relative path the caller validated (no /, no ..). +// content: file body bytes. +func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, content []byte) error { + if instanceID == "" { + return fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace") + } + absPath, err := resolveWorkspaceFilePath(runtime, relPath) + if err != nil { + return fmt.Errorf("invalid path: %w", err) + } + + osUser := os.Getenv("WORKSPACE_EC2_OS_USER") + if osUser == "" { + osUser = "ubuntu" + } + region := os.Getenv("AWS_REGION") + if region == "" { + region = "us-east-2" + } + + ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout) + defer cancel() + + // Ephemeral keypair. + keyDir, err := os.MkdirTemp("", "molecule-filewrite-*") + if err != nil { + return fmt.Errorf("keydir mkdir: %w", err) + } + defer func() { _ = os.RemoveAll(keyDir) }() + keyPath := keyDir + "/id" + if out, kerr := exec.CommandContext(ctx, "ssh-keygen", + "-t", "ed25519", "-f", keyPath, "-N", "", "-q", + "-C", "molecule-filewrite", + ).CombinedOutput(); kerr != nil { + return fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out))) + } + pubKey, err := os.ReadFile(keyPath + ".pub") + if err != nil { + return fmt.Errorf("read pubkey: %w", err) + } + + // 1. Push key. + if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil { + return fmt.Errorf("send-ssh-public-key: %w", err) + } + + // 2. Open tunnel on an OS-picked free port. + localPort, err := pickFreePort() + if err != nil { + return fmt.Errorf("pick free port: %w", err) + } + opts := eicSSHOptions{ + InstanceID: instanceID, + OSUser: osUser, + Region: region, + LocalPort: localPort, + PrivateKeyPath: keyPath, + } + tunnel := openTunnelCmd(opts) + tunnel.Env = os.Environ() + if err := tunnel.Start(); err != nil { + return fmt.Errorf("open-tunnel start: %w", err) + } + defer func() { + if tunnel.Process != nil { + _ = tunnel.Process.Kill() + } + _ = tunnel.Wait() + }() + if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil { + return fmt.Errorf("tunnel never listened: %w", err) + } + + // 3. SSH + install -D. `install` creates any missing parent dirs and + // writes the file atomically via temp-file-rename. Permissions 0644 + // match the existing tar-unpack defaults on the Docker path. + // + // The remote command is fully deterministic — no user-controlled + // input reaches a shell eval (absPath is built from a map + Clean()). + sshArgs := []string{ + "-i", keyPath, + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-o", "ServerAliveInterval=15", + "-p", fmt.Sprintf("%d", localPort), + fmt.Sprintf("%s@127.0.0.1", osUser), + fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)), + } + sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...) + sshCmd.Env = os.Environ() + sshCmd.Stdin = bytes.NewReader(content) + var stderr bytes.Buffer + sshCmd.Stderr = &stderr + if err := sshCmd.Run(); err != nil { + return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String())) + } + log.Printf("writeFileViaEIC: ws instance=%s runtime=%s wrote %d bytes → %s", + instanceID, runtime, len(content), absPath) + return nil +} + +// shellQuote wraps a value in single quotes + escapes embedded single +// quotes for POSIX sh. Used for the sole piece of variable data in the +// remote ssh command. (absPath is already built from a map + Clean() so +// traversal is blocked regardless; this is defence-in-depth against +// future refactor that might accept user paths here.) +func shellQuote(s string) string { + return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" +} diff --git a/workspace-server/internal/handlers/template_files_eic_test.go b/workspace-server/internal/handlers/template_files_eic_test.go new file mode 100644 index 00000000..6e8a901f --- /dev/null +++ b/workspace-server/internal/handlers/template_files_eic_test.go @@ -0,0 +1,85 @@ +package handlers + +import ( + "strings" + "testing" +) + +// TestResolveWorkspaceFilePath_KnownRuntimes — the runtime → base-path +// map is the source of truth for where saved files land on the workspace +// EC2. Changing a base path without a migration shim silently orphans +// previously-saved files; this test pins the current contract. +func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) { + cases := []struct { + runtime string + relPath string + want string + }{ + {"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, + {"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive + {"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"}, + {"langgraph", "config.yaml", "/opt/configs/config.yaml"}, + {"external", "skills.json", "/opt/configs/skills.json"}, + {"", "config.yaml", "/opt/configs/config.yaml"}, // empty → default + {"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default + } + for _, tc := range cases { + t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) { + got, err := resolveWorkspaceFilePath(tc.runtime, tc.relPath) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if got != tc.want { + t.Errorf("resolveWorkspaceFilePath(%q,%q) = %q, want %q", + tc.runtime, tc.relPath, got, tc.want) + } + }) + } +} + +// TestResolveWorkspaceFilePath_RejectsTraversal — any attempt to escape +// the runtime base path via .. or absolute paths must return an error +// before the ssh install runs. validateRelPath uses filepath.Clean then +// checks for `..` or absolute prefix, so cases like `a/../b` are +// NORMALIZED to `b` and accepted (still safe — stays inside base). +// We only assert the cases that Clean() can't rescue. +func TestResolveWorkspaceFilePath_RejectsTraversal(t *testing.T) { + bad := []string{ + "../etc/shadow", // escapes base via .. + "/etc/shadow", // absolute path + "./../../etc", // multiple .. + "a/../../etc", // escapes via deeper .. + } + for _, rel := range bad { + t.Run(rel, func(t *testing.T) { + _, err := resolveWorkspaceFilePath("hermes", rel) + if err == nil { + t.Errorf("resolveWorkspaceFilePath(hermes, %q) should have errored, got nil", rel) + } + }) + } +} + +// TestShellQuote — the sole piece of variable data in the remote ssh +// command is the absolute path. It's already built from a map + Clean() +// so traversal is impossible, but we still single-quote as defence-in- +// depth. Verify the shell-quoting helper handles the single-quote edge +// case and is always wrapped in single quotes. +func TestShellQuote(t *testing.T) { + cases := map[string]string{ + "/home/ubuntu/.hermes/config.yaml": "'/home/ubuntu/.hermes/config.yaml'", + "": "''", + "a'b": `'a'\''b'`, + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + got := shellQuote(in) + if got != want { + t.Errorf("shellQuote(%q) = %q, want %q", in, got, want) + } + if !strings.HasPrefix(got, "'") || !strings.HasSuffix(got, "'") { + t.Errorf("shellQuote(%q) = %q must be single-quote wrapped", in, got) + } + }) + } +} diff --git a/workspace-server/internal/handlers/template_import.go b/workspace-server/internal/handlers/template_import.go index d3a8557a..5776db3c 100644 --- a/workspace-server/internal/handlers/template_import.go +++ b/workspace-server/internal/handlers/template_import.go @@ -174,8 +174,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) { } ctx := c.Request.Context() - var wsName string - if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil { + var wsName, instanceID, runtime string + if err := db.DB.QueryRowContext(ctx, + `SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`, + workspaceID, + ).Scan(&wsName, &instanceID, &runtime); err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"}) return } @@ -188,6 +191,28 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) { } } + // SaaS workspace (EC2-per-workspace) — route bulk write through the + // EIC endpoint, one SSH session per file. Per-file cost is ~3s + // (key push + tunnel + install), so up to 10 files is fine; above + // that we should reuse the tunnel across multiple writes — tracked + // as a follow-up. + if instanceID != "" { + for relPath, content := range body.Files { + if err := writeFileViaEIC(ctx, instanceID, runtime, relPath, []byte(content)); err != nil { + log.Printf("ReplaceFiles EIC for %s path=%s: %v", workspaceID, relPath, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file %s: %v", relPath, err)}) + return + } + } + c.JSON(http.StatusOK, gin.H{ + "status": "replaced", + "workspace": workspaceID, + "files": len(body.Files), + "source": "ec2-ssh", + }) + return + } + // Write via Docker CopyToContainer when container is running if containerName := h.findContainer(ctx, workspaceID); containerName != "" { if err := h.copyFilesToContainer(ctx, containerName, "/configs", body.Files); err != nil { diff --git a/workspace-server/internal/handlers/templates.go b/workspace-server/internal/handlers/templates.go index 0fd55847..f2d456f0 100644 --- a/workspace-server/internal/handlers/templates.go +++ b/workspace-server/internal/handlers/templates.go @@ -353,13 +353,28 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) { } ctx := c.Request.Context() - var wsName string - if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil { + var wsName, instanceID, runtime string + if err := db.DB.QueryRowContext(ctx, + `SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`, + workspaceID, + ).Scan(&wsName, &instanceID, &runtime); err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"}) return } - // Write via Docker CopyToContainer when container is running + // SaaS workspace (EC2-per-workspace) — no Docker on this tenant. Write + // via SSH through the EIC endpoint to the runtime-specific path. + if instanceID != "" { + if err := writeFileViaEIC(ctx, instanceID, runtime, filePath, []byte(body.Content)); err != nil { + log.Printf("WriteFile EIC for %s path=%s: %v", workspaceID, filePath, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file: %v", err)}) + return + } + c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath}) + return + } + + // Local Docker path — write via CopyToContainer when container is running if containerName := h.findContainer(ctx, workspaceID); containerName != "" { singleFile := map[string]string{filePath: body.Content} if err := h.copyFilesToContainer(ctx, containerName, "/configs", singleFile); err != nil {