From 27fea03c076416634f8952e4e9a9bfe03a30ea04 Mon Sep 17 00:00:00 2001 From: Molecule AI Fullstack Engineer Date: Thu, 14 May 2026 22:14:26 +0000 Subject: [PATCH] fix(provisioner): wire collectCPConfigFiles into CPProvisioner.Start (OFFSEC-010) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OFFSEC-010 added collectCPConfigFiles but it was unreachable dead code — CPProvisioner.Start never called it, so the control plane received no config files at boot time. Both Core-BE and Core-DevOps A2A provision backends depend on this wiring. Two wiring steps: 1. Add ConfigFiles map[string]string to cpProvisionRequest. Value is base64-encoded (done by collectCPConfigFiles) so it passes safely over the wire without shell metacharacter concerns. 2. Call collectCPConfigFiles(cfg) before building the request. Errors propagate immediately — a workspace with no config files cannot function. Also adds the collectCPConfigFiles function itself (OFFSEC-010 from PR #1075) on top of staging, and fixes missing os + path/filepath imports in cp_provisioner_test.go (pre-existing issue from the conflict- resolution merge that introduced the original tests). Test coverage: TestCollectCPConfigFiles_SkipsSymlinks, TestCollectCPConfigFiles_RejectsRootSymlink, TestStart_PassesConfigFilesToCP, TestStart_CollectConfigFilesErrorSurfaces. Closes #1077 Co-Authored-By: Claude Opus 4.7 --- .../internal/provisioner/cp_provisioner.go | 103 +++++++++++ .../provisioner/cp_provisioner_test.go | 166 ++++++++++++++++++ 2 files changed, 269 insertions(+) diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go index 4b3786a8..a9808412 100644 --- a/workspace-server/internal/provisioner/cp_provisioner.go +++ b/workspace-server/internal/provisioner/cp_provisioner.go @@ -4,12 +4,14 @@ import ( "bytes" "context" "database/sql" + "encoding/base64" "encoding/json" "fmt" "io" "log" "net/http" "os" + "path/filepath" "strings" "time" @@ -156,6 +158,10 @@ type cpProvisionRequest struct { Tier int `json:"tier"` PlatformURL string `json:"platform_url"` Env map[string]string `json:"env"` + // ConfigFiles are base64-encoded config files collected from the template + // directory and inline ConfigFiles. Sent to the control plane so CP can + // write them into the EC2 instance at boot time (OFFSEC-010 wiring fix). + ConfigFiles map[string]string `json:"config_files,omitempty"` } type cpProvisionResponse struct { @@ -179,6 +185,16 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, } env["ADMIN_TOKEN"] = p.adminToken } + + // Collect config files from the template directory and inline ConfigFiles. + // Without this wiring, OFFSEC-010 config-file collection is dead code and + // CP receives no files. An error here is fatal — a workspace with no + // config files cannot function. + configFiles, err := collectCPConfigFiles(cfg) + if err != nil { + return "", fmt.Errorf("cp provisioner: collect config files: %w", err) + } + req := cpProvisionRequest{ OrgID: p.orgID, WorkspaceID: cfg.WorkspaceID, @@ -186,6 +202,7 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, Tier: cfg.Tier, PlatformURL: cfg.PlatformURL, Env: env, + ConfigFiles: configFiles, } body, err := json.Marshal(req) @@ -237,6 +254,92 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, return result.InstanceID, nil } +const cpConfigFilesMaxBytes = 12 << 10 + +// collectCPConfigFiles walks cfg.TemplatePath and collects regular files as +// base64-encoded key→value entries, plus any inline cfg.ConfigFiles. Used to +// ship template config into the EC2 instance via the control plane. +// +// Security (OFFSEC-010): +// - cfg.TemplatePath itself must not be a symlink (os.Lstat check). +// - WalkDir skips symlinks inside the tree (d.Type()&os.ModeSymlink guard). +// - All file paths are cleaned and relativized; absolute/traversal paths +// are rejected by addFile's sanitization checks. +// +// Returns nil, nil when no files are found — caller must handle empty case. +func collectCPConfigFiles(cfg WorkspaceConfig) (map[string]string, error) { + files := make(map[string]string) + total := 0 + addFile := func(name string, data []byte) error { + name = filepath.ToSlash(filepath.Clean(name)) + if name == "." || strings.HasPrefix(name, "../") || strings.HasPrefix(name, "/") || strings.Contains(name, "/../") { + return fmt.Errorf("invalid config file path %q", name) + } + total += len(data) + if total > cpConfigFilesMaxBytes { + return fmt.Errorf("config files exceed %d bytes", cpConfigFilesMaxBytes) + } + files[name] = base64.StdEncoding.EncodeToString(data) + return nil + } + + if cfg.TemplatePath != "" { + // Reject symlinks on the root itself — WalkDir follows symlinks, + // so a symlink TemplatePath that escapes the intended root directory + // would bypass the subsequent path-relativization checks below. + rootInfo, err := os.Lstat(cfg.TemplatePath) + if err != nil { + return nil, fmt.Errorf("collectCPConfigFiles: lstat template path: %w", err) + } + if rootInfo.Mode()&os.ModeSymlink != 0 { + return nil, fmt.Errorf("collectCPConfigFiles: template path must not be a symlink") + } + err = filepath.WalkDir(cfg.TemplatePath, func(path string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + // Skip symlinks — WalkDir follows them by default, which means + // a symlink inside the template dir pointing to /etc/passwd + // would be traversed even though the resulting relative-path + // check would correctly reject it. Defense-in-depth. (OFFSEC-010) + if d.Type()&os.ModeSymlink != 0 { + return nil + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return err + } + if !info.Mode().IsRegular() { + return nil + } + rel, err := filepath.Rel(cfg.TemplatePath, path) + if err != nil { + return err + } + data, err := os.ReadFile(path) + if err != nil { + return err + } + return addFile(rel, data) + }) + if err != nil { + return nil, err + } + } + for name, data := range cfg.ConfigFiles { + if err := addFile(name, data); err != nil { + return nil, err + } + } + if len(files) == 0 { + return nil, nil + } + return files, nil +} + // Stop terminates the workspace's EC2 instance via the control plane. // // Looks up the actual EC2 instance_id from the workspaces table before diff --git a/workspace-server/internal/provisioner/cp_provisioner_test.go b/workspace-server/internal/provisioner/cp_provisioner_test.go index 4d8a6795..147c1e7c 100644 --- a/workspace-server/internal/provisioner/cp_provisioner_test.go +++ b/workspace-server/internal/provisioner/cp_provisioner_test.go @@ -6,6 +6,8 @@ import ( "io" "net/http" "net/http/httptest" + "os" + "path/filepath" "strings" "testing" "time" @@ -842,3 +844,167 @@ func TestIsRunning_EmptyInstanceIDReturnsFalse(t *testing.T) { t.Errorf("IsRunning with empty instance_id should return running=false, got true") } } + +// TestCollectCPConfigFiles_SkipsSymlinks — WalkDir follows symlinks by default, +// but collectCPConfigFiles must skip them so a symlink inside a template dir +// pointing outside (e.g. ln -s /etc snapshot) cannot be traversed. +// Verifies OFFSEC-010 defense-in-depth fix. +func TestCollectCPConfigFiles_SkipsSymlinks(t *testing.T) { + tmpl := t.TempDir() + if err := os.WriteFile(filepath.Join(tmpl, "config.yaml"), []byte("name: real\n"), 0o600); err != nil { + t.Fatal(err) + } + sensitiveDir := t.TempDir() + if err := os.WriteFile(filepath.Join(sensitiveDir, "secret.txt"), []byte("SENSITIVE\n"), 0o600); err != nil { + t.Fatal(err) + } + symlinkPath := filepath.Join(tmpl, "snapshot") + if err := os.Symlink(sensitiveDir, symlinkPath); err != nil { + t.Fatal(err) + } + + files, err := collectCPConfigFiles(WorkspaceConfig{TemplatePath: tmpl}) + if err != nil { + t.Fatalf("collectCPConfigFiles: %v", err) + } + if files == nil { + t.Fatal("files should not be nil") + } + if _, ok := files["config.yaml"]; !ok { + t.Errorf("config.yaml missing from files") + } + for k := range files { + if strings.Contains(k, "snapshot") || strings.Contains(k, "secret") { + t.Errorf("symlink path %q should not be in files — OFFSEC-010 regression", k) + } + } +} + +// TestCollectCPConfigFiles_RejectsRootSymlink — if cfg.TemplatePath itself is +// a symlink, WalkDir would follow it to an arbitrary directory, bypassing the +// cfg.TemplatePath boundary. The function must reject this explicitly. +func TestCollectCPConfigFiles_RejectsRootSymlink(t *testing.T) { + real := t.TempDir() + if err := os.WriteFile(filepath.Join(real, "config.yaml"), []byte("name: real\n"), 0o600); err != nil { + t.Fatal(err) + } + link := filepath.Join(t.TempDir(), "template-link") + if err := os.Symlink(real, link); err != nil { + t.Fatal(err) + } + + _, err := collectCPConfigFiles(WorkspaceConfig{TemplatePath: link}) + if err == nil { + t.Error("collectCPConfigFiles with symlink TemplatePath should return error") + } + if err != nil && !strings.Contains(err.Error(), "symlink") { + t.Errorf("expected symlink-related error, got: %v", err) + } +} + +// TestStart_PassesConfigFilesToCP — Start must call collectCPConfigFiles and +// include the result (base64-encoded files) in the cpProvisionRequest body sent +// to the control plane. Without this wiring, OFFSEC-010 config-file collection +// is dead code and CP receives no files. (OFFSEC-010 wiring fix) +func TestStart_PassesConfigFilesToCP(t *testing.T) { + tmpl := t.TempDir() + if err := os.WriteFile(filepath.Join(tmpl, "agent.yaml"), []byte("name: test\n"), 0o600); err != nil { + t.Fatal(err) + } + inlineFiles := map[string][]byte{ + "runtime.yaml": []byte("runtime: python\n"), + } + + var sawBody cpProvisionRequest + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := json.NewDecoder(r.Body).Decode(&sawBody); err != nil { + t.Fatalf("decode CP request body: %v", err) + } + w.WriteHeader(http.StatusCreated) + _, _ = io.WriteString(w, `{"instance_id":"i-cfgt","state":"running"}`) + })) + defer srv.Close() + + p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()} + _, err := p.Start(context.Background(), WorkspaceConfig{ + WorkspaceID: "ws-cfg", + Runtime: "python", + Tier: 2, + PlatformURL: "https://tenant.example", + TemplatePath: tmpl, + ConfigFiles: inlineFiles, + }) + if err != nil { + t.Fatalf("Start: %v", err) + } + if sawBody.WorkspaceID != "ws-cfg" { + t.Errorf("WorkspaceID = %q, want ws-cfg", sawBody.WorkspaceID) + } + if sawBody.ConfigFiles == nil { + t.Fatal("ConfigFiles must not be nil — collectCPConfigFiles returned nil") + } + foundAgent := false + foundRuntime := false + for name := range sawBody.ConfigFiles { + if name == "agent.yaml" { + foundAgent = true + } + if name == "runtime.yaml" { + foundRuntime = true + } + } + if !foundAgent { + t.Errorf("agent.yaml missing from ConfigFiles; keys = %v", mapKeys(sawBody.ConfigFiles)) + } + if !foundRuntime { + t.Errorf("runtime.yaml missing from ConfigFiles; keys = %v", mapKeys(sawBody.ConfigFiles)) + } +} + +// TestStart_CollectConfigFilesErrorSurfaces — if collectCPConfigFiles returns +// an error (e.g. TemplatePath is a symlink, or files exceed the 12 KiB cap), +// Start must propagate that error rather than silently continuing. (OFFSEC-010) +func TestStart_CollectConfigFilesErrorSurfaces(t *testing.T) { + realDir := t.TempDir() + if err := os.WriteFile(filepath.Join(realDir, "config.yaml"), []byte("data\n"), 0o600); err != nil { + t.Fatal(err) + } + symlinkPath := filepath.Join(t.TempDir(), "template-link") + if err := os.Symlink(realDir, symlinkPath); err != nil { + t.Fatal(err) + } + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Error("CP must not be called when collectCPConfigFiles fails") + w.WriteHeader(http.StatusCreated) + })) + defer srv.Close() + + p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()} + _, err := p.Start(context.Background(), WorkspaceConfig{ + WorkspaceID: "ws-symlink", + Runtime: "python", + TemplatePath: symlinkPath, + }) + if err == nil { + t.Fatal("expected error when TemplatePath is a symlink, got nil") + } + if !strings.Contains(err.Error(), "symlink") { + t.Errorf("error should mention symlink, got: %v", err) + } + if strings.Contains(err.Error(), "cp provisioner: send") { + t.Errorf("CP must not be called; error should be from collectConfigFiles, not HTTP send: %v", err) + } +} + +// mapKeys returns the keys of a map — helper for test assertions. +func mapKeys(m map[string]string) []string { + if m == nil { + return nil + } + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +}