feat(files-api): SSH-backed write for SaaS workspaces (fixes 500 docker not available)
Symptom (prod, hongmingwang tenant, 2026-04-22):
PUT /workspaces/:id/files/config.yaml → 500
{"error":"failed to write file: docker not available"}
Root cause: WriteFile + ReplaceFiles always reached for the tenant's
Docker client, but SaaS workspaces run as EC2 VMs (no Docker on the
tenant to cp into). There was no SaaS code path, so Save/Save&Restart
in the Config tab silently 500'd for every SaaS user.
Fix: add writeFileViaEIC — same ephemeral-keypair + EIC-tunnel dance
that the Terminal tab already uses (terminal.go). Flow:
1. ssh-keygen ephemeral ed25519 pair
2. aws ec2-instance-connect send-ssh-public-key (60s validity)
3. aws ec2-instance-connect open-tunnel (TLS → :22)
4. ssh ... "install -D -m 0644 /dev/stdin <abs path>"
install -D creates missing parent dirs atomically
5. Kill tunnel + wipe keydir
Runtime → base-path map (new table workspaceFilePathPrefix):
hermes → /home/ubuntu/.hermes
langgraph → /opt/configs
external → /opt/configs
unknown → /opt/configs
Both WriteFile (single file) and ReplaceFiles (bulk) detect
`workspaces.instance_id != ''` and route to EIC instead of Docker.
Local/self-hosted Docker path is unchanged.
Security: the only variable piece in the remote ssh command is the
absolute path, which is built via map lookup + filepath.Clean so
traversal is blocked. shellQuote() wraps it as defence-in-depth.
validateRelPath rejects absolute paths and surviving `..` segments
up-front; tests assert traversal rejection.
Follow-ups tracked separately:
- Reload hook after save (hermes gateway restart via SSH)
- Per-tunnel batching for ReplaceFiles with many files
- Runtime-specific base paths should be declared in the runtime
manifest, not hardcoded in the handler
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2b603164de
commit
03741d1110
182
workspace-server/internal/handlers/template_files_eic.go
Normal file
182
workspace-server/internal/handlers/template_files_eic.go
Normal file
@ -0,0 +1,182 @@
|
||||
package handlers
|
||||
|
||||
// template_files_eic.go — SSH-backed file write for SaaS workspaces
|
||||
// (EC2-per-workspace). Pairs with the existing Docker-path in templates.go
|
||||
// (WriteFile) and template_import.go (ReplaceFiles).
|
||||
//
|
||||
// Flow for a single file write:
|
||||
// 1. Generate ephemeral ed25519 keypair (on-disk for ≤ write duration).
|
||||
// 2. Push the public key via `aws ec2-instance-connect send-ssh-public-key`
|
||||
// so the target sshd accepts it for the next 60s.
|
||||
// 3. Open a TLS-tunnelled TCP port via `aws ec2-instance-connect open-tunnel`
|
||||
// from a local free port → workspace's sshd on 22.
|
||||
// 4. Pipe content to `ssh ... "install -D -m 0644 /dev/stdin <abs path>"`.
|
||||
// `install -D` creates any missing parent dirs atomically. File is owned
|
||||
// by whichever $OSUser we authenticated as (ubuntu by default).
|
||||
// 5. Close tunnel + wipe keydir.
|
||||
//
|
||||
// All the AWS calls + ssh tunnel exec go through the same package-level
|
||||
// func vars defined in terminal.go (openTunnelCmd, sendSSHPublicKey) so
|
||||
// tests can stub them the same way the terminal tests do.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// workspaceFilePathPrefix maps a runtime name to the absolute base path on
|
||||
// the workspace EC2 where the Files API's relative paths land. New runtimes
|
||||
// can be added here without touching handler code.
|
||||
//
|
||||
// Keep these stable — changing the base path for an existing runtime
|
||||
// without a migration shim will make previously-saved files disappear from
|
||||
// the runtime's POV.
|
||||
var workspaceFilePathPrefix = map[string]string{
|
||||
"hermes": "/home/ubuntu/.hermes",
|
||||
"langgraph": "/opt/configs",
|
||||
"external": "/opt/configs",
|
||||
// Default for unknown / future runtimes is /opt/configs — most
|
||||
// conservative place that doesn't collide with system or runtime-
|
||||
// private directories.
|
||||
}
|
||||
|
||||
func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
|
||||
if err := validateRelPath(relPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
|
||||
if !ok {
|
||||
base = "/opt/configs"
|
||||
}
|
||||
return filepath.Join(base, filepath.Clean(relPath)), nil
|
||||
}
|
||||
|
||||
// eicFileWriteTimeout bounds the whole dance. Key push is <500ms, tunnel
|
||||
// is 1-2s, ssh + write is <2s. 30s gives headroom for slow pulls without
|
||||
// hanging the Files API forever under EIC misconfiguration.
|
||||
const eicFileWriteTimeout = 30 * time.Second
|
||||
|
||||
// writeFileViaEIC writes a single file to the workspace EC2 at the
|
||||
// absolute path that resolveWorkspaceFilePath computed. On success,
|
||||
// optionally invokes the runtime's reload hook (not implemented yet —
|
||||
// tracked as follow-up; for today the canvas issues a separate Restart
|
||||
// after Save).
|
||||
//
|
||||
// instanceID: AWS EC2 instance id from workspaces.instance_id.
|
||||
// runtime: used only for path-prefix resolution.
|
||||
// relPath: the relative path the caller validated (no /, no ..).
|
||||
// content: file body bytes.
|
||||
func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, content []byte) error {
|
||||
if instanceID == "" {
|
||||
return fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
|
||||
}
|
||||
absPath, err := resolveWorkspaceFilePath(runtime, relPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid path: %w", err)
|
||||
}
|
||||
|
||||
osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
|
||||
if osUser == "" {
|
||||
osUser = "ubuntu"
|
||||
}
|
||||
region := os.Getenv("AWS_REGION")
|
||||
if region == "" {
|
||||
region = "us-east-2"
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout)
|
||||
defer cancel()
|
||||
|
||||
// Ephemeral keypair.
|
||||
keyDir, err := os.MkdirTemp("", "molecule-filewrite-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("keydir mkdir: %w", err)
|
||||
}
|
||||
defer func() { _ = os.RemoveAll(keyDir) }()
|
||||
keyPath := keyDir + "/id"
|
||||
if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
|
||||
"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
|
||||
"-C", "molecule-filewrite",
|
||||
).CombinedOutput(); kerr != nil {
|
||||
return fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
|
||||
}
|
||||
pubKey, err := os.ReadFile(keyPath + ".pub")
|
||||
if err != nil {
|
||||
return fmt.Errorf("read pubkey: %w", err)
|
||||
}
|
||||
|
||||
// 1. Push key.
|
||||
if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
|
||||
return fmt.Errorf("send-ssh-public-key: %w", err)
|
||||
}
|
||||
|
||||
// 2. Open tunnel on an OS-picked free port.
|
||||
localPort, err := pickFreePort()
|
||||
if err != nil {
|
||||
return fmt.Errorf("pick free port: %w", err)
|
||||
}
|
||||
opts := eicSSHOptions{
|
||||
InstanceID: instanceID,
|
||||
OSUser: osUser,
|
||||
Region: region,
|
||||
LocalPort: localPort,
|
||||
PrivateKeyPath: keyPath,
|
||||
}
|
||||
tunnel := openTunnelCmd(opts)
|
||||
tunnel.Env = os.Environ()
|
||||
if err := tunnel.Start(); err != nil {
|
||||
return fmt.Errorf("open-tunnel start: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if tunnel.Process != nil {
|
||||
_ = tunnel.Process.Kill()
|
||||
}
|
||||
_ = tunnel.Wait()
|
||||
}()
|
||||
if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil {
|
||||
return fmt.Errorf("tunnel never listened: %w", err)
|
||||
}
|
||||
|
||||
// 3. SSH + install -D. `install` creates any missing parent dirs and
|
||||
// writes the file atomically via temp-file-rename. Permissions 0644
|
||||
// match the existing tar-unpack defaults on the Docker path.
|
||||
//
|
||||
// The remote command is fully deterministic — no user-controlled
|
||||
// input reaches a shell eval (absPath is built from a map + Clean()).
|
||||
sshArgs := []string{
|
||||
"-i", keyPath,
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ServerAliveInterval=15",
|
||||
"-p", fmt.Sprintf("%d", localPort),
|
||||
fmt.Sprintf("%s@127.0.0.1", osUser),
|
||||
fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
|
||||
}
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
|
||||
sshCmd.Env = os.Environ()
|
||||
sshCmd.Stdin = bytes.NewReader(content)
|
||||
var stderr bytes.Buffer
|
||||
sshCmd.Stderr = &stderr
|
||||
if err := sshCmd.Run(); err != nil {
|
||||
return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
log.Printf("writeFileViaEIC: ws instance=%s runtime=%s wrote %d bytes → %s",
|
||||
instanceID, runtime, len(content), absPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// shellQuote wraps a value in single quotes + escapes embedded single
|
||||
// quotes for POSIX sh. Used for the sole piece of variable data in the
|
||||
// remote ssh command. (absPath is already built from a map + Clean() so
|
||||
// traversal is blocked regardless; this is defence-in-depth against
|
||||
// future refactor that might accept user paths here.)
|
||||
func shellQuote(s string) string {
|
||||
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestResolveWorkspaceFilePath_KnownRuntimes — the runtime → base-path
|
||||
// map is the source of truth for where saved files land on the workspace
|
||||
// EC2. Changing a base path without a migration shim silently orphans
|
||||
// previously-saved files; this test pins the current contract.
|
||||
func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) {
|
||||
cases := []struct {
|
||||
runtime string
|
||||
relPath string
|
||||
want string
|
||||
}{
|
||||
{"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"},
|
||||
{"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive
|
||||
{"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"},
|
||||
{"langgraph", "config.yaml", "/opt/configs/config.yaml"},
|
||||
{"external", "skills.json", "/opt/configs/skills.json"},
|
||||
{"", "config.yaml", "/opt/configs/config.yaml"}, // empty → default
|
||||
{"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) {
|
||||
got, err := resolveWorkspaceFilePath(tc.runtime, tc.relPath)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Errorf("resolveWorkspaceFilePath(%q,%q) = %q, want %q",
|
||||
tc.runtime, tc.relPath, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveWorkspaceFilePath_RejectsTraversal — any attempt to escape
|
||||
// the runtime base path via .. or absolute paths must return an error
|
||||
// before the ssh install runs. validateRelPath uses filepath.Clean then
|
||||
// checks for `..` or absolute prefix, so cases like `a/../b` are
|
||||
// NORMALIZED to `b` and accepted (still safe — stays inside base).
|
||||
// We only assert the cases that Clean() can't rescue.
|
||||
func TestResolveWorkspaceFilePath_RejectsTraversal(t *testing.T) {
|
||||
bad := []string{
|
||||
"../etc/shadow", // escapes base via ..
|
||||
"/etc/shadow", // absolute path
|
||||
"./../../etc", // multiple ..
|
||||
"a/../../etc", // escapes via deeper ..
|
||||
}
|
||||
for _, rel := range bad {
|
||||
t.Run(rel, func(t *testing.T) {
|
||||
_, err := resolveWorkspaceFilePath("hermes", rel)
|
||||
if err == nil {
|
||||
t.Errorf("resolveWorkspaceFilePath(hermes, %q) should have errored, got nil", rel)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestShellQuote — the sole piece of variable data in the remote ssh
|
||||
// command is the absolute path. It's already built from a map + Clean()
|
||||
// so traversal is impossible, but we still single-quote as defence-in-
|
||||
// depth. Verify the shell-quoting helper handles the single-quote edge
|
||||
// case and is always wrapped in single quotes.
|
||||
func TestShellQuote(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"/home/ubuntu/.hermes/config.yaml": "'/home/ubuntu/.hermes/config.yaml'",
|
||||
"": "''",
|
||||
"a'b": `'a'\''b'`,
|
||||
}
|
||||
for in, want := range cases {
|
||||
t.Run(in, func(t *testing.T) {
|
||||
got := shellQuote(in)
|
||||
if got != want {
|
||||
t.Errorf("shellQuote(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
if !strings.HasPrefix(got, "'") || !strings.HasSuffix(got, "'") {
|
||||
t.Errorf("shellQuote(%q) = %q must be single-quote wrapped", in, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -174,8 +174,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
var wsName string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
|
||||
var wsName, instanceID, runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsName, &instanceID, &runtime); err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
@ -188,6 +191,28 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// SaaS workspace (EC2-per-workspace) — route bulk write through the
|
||||
// EIC endpoint, one SSH session per file. Per-file cost is ~3s
|
||||
// (key push + tunnel + install), so up to 10 files is fine; above
|
||||
// that we should reuse the tunnel across multiple writes — tracked
|
||||
// as a follow-up.
|
||||
if instanceID != "" {
|
||||
for relPath, content := range body.Files {
|
||||
if err := writeFileViaEIC(ctx, instanceID, runtime, relPath, []byte(content)); err != nil {
|
||||
log.Printf("ReplaceFiles EIC for %s path=%s: %v", workspaceID, relPath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file %s: %v", relPath, err)})
|
||||
return
|
||||
}
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"status": "replaced",
|
||||
"workspace": workspaceID,
|
||||
"files": len(body.Files),
|
||||
"source": "ec2-ssh",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Write via Docker CopyToContainer when container is running
|
||||
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
|
||||
if err := h.copyFilesToContainer(ctx, containerName, "/configs", body.Files); err != nil {
|
||||
|
||||
@ -353,13 +353,28 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
var wsName string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
|
||||
var wsName, instanceID, runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsName, &instanceID, &runtime); err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
|
||||
// Write via Docker CopyToContainer when container is running
|
||||
// SaaS workspace (EC2-per-workspace) — no Docker on this tenant. Write
|
||||
// via SSH through the EIC endpoint to the runtime-specific path.
|
||||
if instanceID != "" {
|
||||
if err := writeFileViaEIC(ctx, instanceID, runtime, filePath, []byte(body.Content)); err != nil {
|
||||
log.Printf("WriteFile EIC for %s path=%s: %v", workspaceID, filePath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file: %v", err)})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
|
||||
return
|
||||
}
|
||||
|
||||
// Local Docker path — write via CopyToContainer when container is running
|
||||
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
|
||||
singleFile := map[string]string{filePath: body.Content}
|
||||
if err := h.copyFilesToContainer(ctx, containerName, "/configs", singleFile); err != nil {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user