Merge pull request #1702 from Molecule-AI/fix/files-api-saas-ssh-write

feat(files-api): SSH-backed write for SaaS workspaces (fixes 500 docker not available)
This commit is contained in:
Hongming Wang 2026-04-22 18:45:52 -07:00 committed by GitHub
commit 7207133825
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 312 additions and 5 deletions

View File

@ -0,0 +1,182 @@
package handlers
// template_files_eic.go — SSH-backed file write for SaaS workspaces
// (EC2-per-workspace). Pairs with the existing Docker-path in templates.go
// (WriteFile) and template_import.go (ReplaceFiles).
//
// Flow for a single file write:
// 1. Generate ephemeral ed25519 keypair (on-disk for ≤ write duration).
// 2. Push the public key via `aws ec2-instance-connect send-ssh-public-key`
// so the target sshd accepts it for the next 60s.
// 3. Open a TLS-tunnelled TCP port via `aws ec2-instance-connect open-tunnel`
// from a local free port → workspace's sshd on 22.
// 4. Pipe content to `ssh ... "install -D -m 0644 /dev/stdin <abs path>"`.
// `install -D` creates any missing parent dirs atomically. File is owned
// by whichever $OSUser we authenticated as (ubuntu by default).
// 5. Close tunnel + wipe keydir.
//
// All the AWS calls + ssh tunnel exec go through the same package-level
// func vars defined in terminal.go (openTunnelCmd, sendSSHPublicKey) so
// tests can stub them the same way the terminal tests do.
import (
"bytes"
"context"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
// workspaceFilePathPrefix maps a runtime name to the absolute base path on
// the workspace EC2 where the Files API's relative paths land. New runtimes
// can be added here without touching handler code.
//
// Keep these stable — changing the base path for an existing runtime
// without a migration shim will make previously-saved files disappear from
// the runtime's POV.
var workspaceFilePathPrefix = map[string]string{
"hermes": "/home/ubuntu/.hermes",
"langgraph": "/opt/configs",
"external": "/opt/configs",
// Default for unknown / future runtimes is /opt/configs — most
// conservative place that doesn't collide with system or runtime-
// private directories.
}
func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
if err := validateRelPath(relPath); err != nil {
return "", err
}
base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
if !ok {
base = "/opt/configs"
}
return filepath.Join(base, filepath.Clean(relPath)), nil
}
// eicFileWriteTimeout bounds the whole dance. Key push is <500ms, tunnel
// is 1-2s, ssh + write is <2s. 30s gives headroom for slow pulls without
// hanging the Files API forever under EIC misconfiguration.
const eicFileWriteTimeout = 30 * time.Second
// writeFileViaEIC writes a single file to the workspace EC2 at the
// absolute path that resolveWorkspaceFilePath computed. On success,
// optionally invokes the runtime's reload hook (not implemented yet —
// tracked as follow-up; for today the canvas issues a separate Restart
// after Save).
//
// instanceID: AWS EC2 instance id from workspaces.instance_id.
// runtime: used only for path-prefix resolution.
// relPath: the relative path the caller validated (no /, no ..).
// content: file body bytes.
func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, content []byte) error {
if instanceID == "" {
return fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
}
absPath, err := resolveWorkspaceFilePath(runtime, relPath)
if err != nil {
return fmt.Errorf("invalid path: %w", err)
}
osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
if osUser == "" {
osUser = "ubuntu"
}
region := os.Getenv("AWS_REGION")
if region == "" {
region = "us-east-2"
}
ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout)
defer cancel()
// Ephemeral keypair.
keyDir, err := os.MkdirTemp("", "molecule-filewrite-*")
if err != nil {
return fmt.Errorf("keydir mkdir: %w", err)
}
defer func() { _ = os.RemoveAll(keyDir) }()
keyPath := keyDir + "/id"
if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
"-C", "molecule-filewrite",
).CombinedOutput(); kerr != nil {
return fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
}
pubKey, err := os.ReadFile(keyPath + ".pub")
if err != nil {
return fmt.Errorf("read pubkey: %w", err)
}
// 1. Push key.
if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
return fmt.Errorf("send-ssh-public-key: %w", err)
}
// 2. Open tunnel on an OS-picked free port.
localPort, err := pickFreePort()
if err != nil {
return fmt.Errorf("pick free port: %w", err)
}
opts := eicSSHOptions{
InstanceID: instanceID,
OSUser: osUser,
Region: region,
LocalPort: localPort,
PrivateKeyPath: keyPath,
}
tunnel := openTunnelCmd(opts)
tunnel.Env = os.Environ()
if err := tunnel.Start(); err != nil {
return fmt.Errorf("open-tunnel start: %w", err)
}
defer func() {
if tunnel.Process != nil {
_ = tunnel.Process.Kill()
}
_ = tunnel.Wait()
}()
if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil {
return fmt.Errorf("tunnel never listened: %w", err)
}
// 3. SSH + install -D. `install` creates any missing parent dirs and
// writes the file atomically via temp-file-rename. Permissions 0644
// match the existing tar-unpack defaults on the Docker path.
//
// The remote command is fully deterministic — no user-controlled
// input reaches a shell eval (absPath is built from a map + Clean()).
sshArgs := []string{
"-i", keyPath,
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ServerAliveInterval=15",
"-p", fmt.Sprintf("%d", localPort),
fmt.Sprintf("%s@127.0.0.1", osUser),
fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
}
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
sshCmd.Env = os.Environ()
sshCmd.Stdin = bytes.NewReader(content)
var stderr bytes.Buffer
sshCmd.Stderr = &stderr
if err := sshCmd.Run(); err != nil {
return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String()))
}
log.Printf("writeFileViaEIC: ws instance=%s runtime=%s wrote %d bytes → %s",
instanceID, runtime, len(content), absPath)
return nil
}
// shellQuote wraps a value in single quotes + escapes embedded single
// quotes for POSIX sh. Used for the sole piece of variable data in the
// remote ssh command. (absPath is already built from a map + Clean() so
// traversal is blocked regardless; this is defence-in-depth against
// future refactor that might accept user paths here.)
func shellQuote(s string) string {
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
}

View File

@ -0,0 +1,85 @@
package handlers
import (
"strings"
"testing"
)
// TestResolveWorkspaceFilePath_KnownRuntimes — the runtime → base-path
// map is the source of truth for where saved files land on the workspace
// EC2. Changing a base path without a migration shim silently orphans
// previously-saved files; this test pins the current contract.
func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) {
cases := []struct {
runtime string
relPath string
want string
}{
{"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"},
{"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive
{"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"},
{"langgraph", "config.yaml", "/opt/configs/config.yaml"},
{"external", "skills.json", "/opt/configs/skills.json"},
{"", "config.yaml", "/opt/configs/config.yaml"}, // empty → default
{"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default
}
for _, tc := range cases {
t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) {
got, err := resolveWorkspaceFilePath(tc.runtime, tc.relPath)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if got != tc.want {
t.Errorf("resolveWorkspaceFilePath(%q,%q) = %q, want %q",
tc.runtime, tc.relPath, got, tc.want)
}
})
}
}
// TestResolveWorkspaceFilePath_RejectsTraversal — any attempt to escape
// the runtime base path via .. or absolute paths must return an error
// before the ssh install runs. validateRelPath uses filepath.Clean then
// checks for `..` or absolute prefix, so cases like `a/../b` are
// NORMALIZED to `b` and accepted (still safe — stays inside base).
// We only assert the cases that Clean() can't rescue.
func TestResolveWorkspaceFilePath_RejectsTraversal(t *testing.T) {
bad := []string{
"../etc/shadow", // escapes base via ..
"/etc/shadow", // absolute path
"./../../etc", // multiple ..
"a/../../etc", // escapes via deeper ..
}
for _, rel := range bad {
t.Run(rel, func(t *testing.T) {
_, err := resolveWorkspaceFilePath("hermes", rel)
if err == nil {
t.Errorf("resolveWorkspaceFilePath(hermes, %q) should have errored, got nil", rel)
}
})
}
}
// TestShellQuote — the sole piece of variable data in the remote ssh
// command is the absolute path. It's already built from a map + Clean()
// so traversal is impossible, but we still single-quote as defence-in-
// depth. Verify the shell-quoting helper handles the single-quote edge
// case and is always wrapped in single quotes.
func TestShellQuote(t *testing.T) {
cases := map[string]string{
"/home/ubuntu/.hermes/config.yaml": "'/home/ubuntu/.hermes/config.yaml'",
"": "''",
"a'b": `'a'\''b'`,
}
for in, want := range cases {
t.Run(in, func(t *testing.T) {
got := shellQuote(in)
if got != want {
t.Errorf("shellQuote(%q) = %q, want %q", in, got, want)
}
if !strings.HasPrefix(got, "'") || !strings.HasSuffix(got, "'") {
t.Errorf("shellQuote(%q) = %q must be single-quote wrapped", in, got)
}
})
}
}

View File

@ -174,8 +174,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
}
ctx := c.Request.Context()
var wsName string
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
var wsName, instanceID, runtime string
if err := db.DB.QueryRowContext(ctx,
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&wsName, &instanceID, &runtime); err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
@ -188,6 +191,28 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
}
}
// SaaS workspace (EC2-per-workspace) — route bulk write through the
// EIC endpoint, one SSH session per file. Per-file cost is ~3s
// (key push + tunnel + install), so up to 10 files is fine; above
// that we should reuse the tunnel across multiple writes — tracked
// as a follow-up.
if instanceID != "" {
for relPath, content := range body.Files {
if err := writeFileViaEIC(ctx, instanceID, runtime, relPath, []byte(content)); err != nil {
log.Printf("ReplaceFiles EIC for %s path=%s: %v", workspaceID, relPath, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file %s: %v", relPath, err)})
return
}
}
c.JSON(http.StatusOK, gin.H{
"status": "replaced",
"workspace": workspaceID,
"files": len(body.Files),
"source": "ec2-ssh",
})
return
}
// Write via Docker CopyToContainer when container is running
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
if err := h.copyFilesToContainer(ctx, containerName, "/configs", body.Files); err != nil {

View File

@ -353,13 +353,28 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
}
ctx := c.Request.Context()
var wsName string
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
var wsName, instanceID, runtime string
if err := db.DB.QueryRowContext(ctx,
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&wsName, &instanceID, &runtime); err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
// Write via Docker CopyToContainer when container is running
// SaaS workspace (EC2-per-workspace) — no Docker on this tenant. Write
// via SSH through the EIC endpoint to the runtime-specific path.
if instanceID != "" {
if err := writeFileViaEIC(ctx, instanceID, runtime, filePath, []byte(body.Content)); err != nil {
log.Printf("WriteFile EIC for %s path=%s: %v", workspaceID, filePath, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file: %v", err)})
return
}
c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
return
}
// Local Docker path — write via CopyToContainer when container is running
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
singleFile := map[string]string{filePath: body.Content}
if err := h.copyFilesToContainer(ctx, containerName, "/configs", singleFile); err != nil {