Merge pull request #1702 from Molecule-AI/fix/files-api-saas-ssh-write
feat(files-api): SSH-backed write for SaaS workspaces (fixes 500 docker not available)
This commit is contained in:
commit
7207133825
182
workspace-server/internal/handlers/template_files_eic.go
Normal file
182
workspace-server/internal/handlers/template_files_eic.go
Normal file
@ -0,0 +1,182 @@
|
||||
package handlers
|
||||
|
||||
// template_files_eic.go — SSH-backed file write for SaaS workspaces
|
||||
// (EC2-per-workspace). Pairs with the existing Docker-path in templates.go
|
||||
// (WriteFile) and template_import.go (ReplaceFiles).
|
||||
//
|
||||
// Flow for a single file write:
|
||||
// 1. Generate ephemeral ed25519 keypair (on-disk for ≤ write duration).
|
||||
// 2. Push the public key via `aws ec2-instance-connect send-ssh-public-key`
|
||||
// so the target sshd accepts it for the next 60s.
|
||||
// 3. Open a TLS-tunnelled TCP port via `aws ec2-instance-connect open-tunnel`
|
||||
// from a local free port → workspace's sshd on 22.
|
||||
// 4. Pipe content to `ssh ... "install -D -m 0644 /dev/stdin <abs path>"`.
|
||||
// `install -D` creates any missing parent dirs atomically. File is owned
|
||||
// by whichever $OSUser we authenticated as (ubuntu by default).
|
||||
// 5. Close tunnel + wipe keydir.
|
||||
//
|
||||
// All the AWS calls + ssh tunnel exec go through the same package-level
|
||||
// func vars defined in terminal.go (openTunnelCmd, sendSSHPublicKey) so
|
||||
// tests can stub them the same way the terminal tests do.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// workspaceFilePathPrefix maps a runtime name to the absolute base path on
|
||||
// the workspace EC2 where the Files API's relative paths land. New runtimes
|
||||
// can be added here without touching handler code.
|
||||
//
|
||||
// Keep these stable — changing the base path for an existing runtime
|
||||
// without a migration shim will make previously-saved files disappear from
|
||||
// the runtime's POV.
|
||||
var workspaceFilePathPrefix = map[string]string{
|
||||
"hermes": "/home/ubuntu/.hermes",
|
||||
"langgraph": "/opt/configs",
|
||||
"external": "/opt/configs",
|
||||
// Default for unknown / future runtimes is /opt/configs — most
|
||||
// conservative place that doesn't collide with system or runtime-
|
||||
// private directories.
|
||||
}
|
||||
|
||||
func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
|
||||
if err := validateRelPath(relPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
|
||||
if !ok {
|
||||
base = "/opt/configs"
|
||||
}
|
||||
return filepath.Join(base, filepath.Clean(relPath)), nil
|
||||
}
|
||||
|
||||
// eicFileWriteTimeout bounds the whole dance. Key push is <500ms, tunnel
|
||||
// is 1-2s, ssh + write is <2s. 30s gives headroom for slow pulls without
|
||||
// hanging the Files API forever under EIC misconfiguration.
|
||||
const eicFileWriteTimeout = 30 * time.Second
|
||||
|
||||
// writeFileViaEIC writes a single file to the workspace EC2 at the
|
||||
// absolute path that resolveWorkspaceFilePath computed. On success,
|
||||
// optionally invokes the runtime's reload hook (not implemented yet —
|
||||
// tracked as follow-up; for today the canvas issues a separate Restart
|
||||
// after Save).
|
||||
//
|
||||
// instanceID: AWS EC2 instance id from workspaces.instance_id.
|
||||
// runtime: used only for path-prefix resolution.
|
||||
// relPath: the relative path the caller validated (no /, no ..).
|
||||
// content: file body bytes.
|
||||
func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, content []byte) error {
|
||||
if instanceID == "" {
|
||||
return fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
|
||||
}
|
||||
absPath, err := resolveWorkspaceFilePath(runtime, relPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid path: %w", err)
|
||||
}
|
||||
|
||||
osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
|
||||
if osUser == "" {
|
||||
osUser = "ubuntu"
|
||||
}
|
||||
region := os.Getenv("AWS_REGION")
|
||||
if region == "" {
|
||||
region = "us-east-2"
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout)
|
||||
defer cancel()
|
||||
|
||||
// Ephemeral keypair.
|
||||
keyDir, err := os.MkdirTemp("", "molecule-filewrite-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("keydir mkdir: %w", err)
|
||||
}
|
||||
defer func() { _ = os.RemoveAll(keyDir) }()
|
||||
keyPath := keyDir + "/id"
|
||||
if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
|
||||
"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
|
||||
"-C", "molecule-filewrite",
|
||||
).CombinedOutput(); kerr != nil {
|
||||
return fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
|
||||
}
|
||||
pubKey, err := os.ReadFile(keyPath + ".pub")
|
||||
if err != nil {
|
||||
return fmt.Errorf("read pubkey: %w", err)
|
||||
}
|
||||
|
||||
// 1. Push key.
|
||||
if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
|
||||
return fmt.Errorf("send-ssh-public-key: %w", err)
|
||||
}
|
||||
|
||||
// 2. Open tunnel on an OS-picked free port.
|
||||
localPort, err := pickFreePort()
|
||||
if err != nil {
|
||||
return fmt.Errorf("pick free port: %w", err)
|
||||
}
|
||||
opts := eicSSHOptions{
|
||||
InstanceID: instanceID,
|
||||
OSUser: osUser,
|
||||
Region: region,
|
||||
LocalPort: localPort,
|
||||
PrivateKeyPath: keyPath,
|
||||
}
|
||||
tunnel := openTunnelCmd(opts)
|
||||
tunnel.Env = os.Environ()
|
||||
if err := tunnel.Start(); err != nil {
|
||||
return fmt.Errorf("open-tunnel start: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if tunnel.Process != nil {
|
||||
_ = tunnel.Process.Kill()
|
||||
}
|
||||
_ = tunnel.Wait()
|
||||
}()
|
||||
if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil {
|
||||
return fmt.Errorf("tunnel never listened: %w", err)
|
||||
}
|
||||
|
||||
// 3. SSH + install -D. `install` creates any missing parent dirs and
|
||||
// writes the file atomically via temp-file-rename. Permissions 0644
|
||||
// match the existing tar-unpack defaults on the Docker path.
|
||||
//
|
||||
// The remote command is fully deterministic — no user-controlled
|
||||
// input reaches a shell eval (absPath is built from a map + Clean()).
|
||||
sshArgs := []string{
|
||||
"-i", keyPath,
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ServerAliveInterval=15",
|
||||
"-p", fmt.Sprintf("%d", localPort),
|
||||
fmt.Sprintf("%s@127.0.0.1", osUser),
|
||||
fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
|
||||
}
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
|
||||
sshCmd.Env = os.Environ()
|
||||
sshCmd.Stdin = bytes.NewReader(content)
|
||||
var stderr bytes.Buffer
|
||||
sshCmd.Stderr = &stderr
|
||||
if err := sshCmd.Run(); err != nil {
|
||||
return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
log.Printf("writeFileViaEIC: ws instance=%s runtime=%s wrote %d bytes → %s",
|
||||
instanceID, runtime, len(content), absPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// shellQuote wraps a value in single quotes + escapes embedded single
|
||||
// quotes for POSIX sh. Used for the sole piece of variable data in the
|
||||
// remote ssh command. (absPath is already built from a map + Clean() so
|
||||
// traversal is blocked regardless; this is defence-in-depth against
|
||||
// future refactor that might accept user paths here.)
|
||||
func shellQuote(s string) string {
|
||||
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestResolveWorkspaceFilePath_KnownRuntimes — the runtime → base-path
|
||||
// map is the source of truth for where saved files land on the workspace
|
||||
// EC2. Changing a base path without a migration shim silently orphans
|
||||
// previously-saved files; this test pins the current contract.
|
||||
func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) {
|
||||
cases := []struct {
|
||||
runtime string
|
||||
relPath string
|
||||
want string
|
||||
}{
|
||||
{"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"},
|
||||
{"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive
|
||||
{"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"},
|
||||
{"langgraph", "config.yaml", "/opt/configs/config.yaml"},
|
||||
{"external", "skills.json", "/opt/configs/skills.json"},
|
||||
{"", "config.yaml", "/opt/configs/config.yaml"}, // empty → default
|
||||
{"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) {
|
||||
got, err := resolveWorkspaceFilePath(tc.runtime, tc.relPath)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Errorf("resolveWorkspaceFilePath(%q,%q) = %q, want %q",
|
||||
tc.runtime, tc.relPath, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveWorkspaceFilePath_RejectsTraversal — any attempt to escape
|
||||
// the runtime base path via .. or absolute paths must return an error
|
||||
// before the ssh install runs. validateRelPath uses filepath.Clean then
|
||||
// checks for `..` or absolute prefix, so cases like `a/../b` are
|
||||
// NORMALIZED to `b` and accepted (still safe — stays inside base).
|
||||
// We only assert the cases that Clean() can't rescue.
|
||||
func TestResolveWorkspaceFilePath_RejectsTraversal(t *testing.T) {
|
||||
bad := []string{
|
||||
"../etc/shadow", // escapes base via ..
|
||||
"/etc/shadow", // absolute path
|
||||
"./../../etc", // multiple ..
|
||||
"a/../../etc", // escapes via deeper ..
|
||||
}
|
||||
for _, rel := range bad {
|
||||
t.Run(rel, func(t *testing.T) {
|
||||
_, err := resolveWorkspaceFilePath("hermes", rel)
|
||||
if err == nil {
|
||||
t.Errorf("resolveWorkspaceFilePath(hermes, %q) should have errored, got nil", rel)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestShellQuote — the sole piece of variable data in the remote ssh
|
||||
// command is the absolute path. It's already built from a map + Clean()
|
||||
// so traversal is impossible, but we still single-quote as defence-in-
|
||||
// depth. Verify the shell-quoting helper handles the single-quote edge
|
||||
// case and is always wrapped in single quotes.
|
||||
func TestShellQuote(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"/home/ubuntu/.hermes/config.yaml": "'/home/ubuntu/.hermes/config.yaml'",
|
||||
"": "''",
|
||||
"a'b": `'a'\''b'`,
|
||||
}
|
||||
for in, want := range cases {
|
||||
t.Run(in, func(t *testing.T) {
|
||||
got := shellQuote(in)
|
||||
if got != want {
|
||||
t.Errorf("shellQuote(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
if !strings.HasPrefix(got, "'") || !strings.HasSuffix(got, "'") {
|
||||
t.Errorf("shellQuote(%q) = %q must be single-quote wrapped", in, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -174,8 +174,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
var wsName string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
|
||||
var wsName, instanceID, runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsName, &instanceID, &runtime); err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
@ -188,6 +191,28 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// SaaS workspace (EC2-per-workspace) — route bulk write through the
|
||||
// EIC endpoint, one SSH session per file. Per-file cost is ~3s
|
||||
// (key push + tunnel + install), so up to 10 files is fine; above
|
||||
// that we should reuse the tunnel across multiple writes — tracked
|
||||
// as a follow-up.
|
||||
if instanceID != "" {
|
||||
for relPath, content := range body.Files {
|
||||
if err := writeFileViaEIC(ctx, instanceID, runtime, relPath, []byte(content)); err != nil {
|
||||
log.Printf("ReplaceFiles EIC for %s path=%s: %v", workspaceID, relPath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file %s: %v", relPath, err)})
|
||||
return
|
||||
}
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"status": "replaced",
|
||||
"workspace": workspaceID,
|
||||
"files": len(body.Files),
|
||||
"source": "ec2-ssh",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Write via Docker CopyToContainer when container is running
|
||||
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
|
||||
if err := h.copyFilesToContainer(ctx, containerName, "/configs", body.Files); err != nil {
|
||||
|
||||
@ -353,13 +353,28 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
var wsName string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
|
||||
var wsName, instanceID, runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsName, &instanceID, &runtime); err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
|
||||
// Write via Docker CopyToContainer when container is running
|
||||
// SaaS workspace (EC2-per-workspace) — no Docker on this tenant. Write
|
||||
// via SSH through the EIC endpoint to the runtime-specific path.
|
||||
if instanceID != "" {
|
||||
if err := writeFileViaEIC(ctx, instanceID, runtime, filePath, []byte(body.Content)); err != nil {
|
||||
log.Printf("WriteFile EIC for %s path=%s: %v", workspaceID, filePath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to write file: %v", err)})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
|
||||
return
|
||||
}
|
||||
|
||||
// Local Docker path — write via CopyToContainer when container is running
|
||||
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
|
||||
singleFile := map[string]string{filePath: body.Content}
|
||||
if err := h.copyFilesToContainer(ctx, containerName, "/configs", singleFile); err != nil {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user