feat(org-import): !external cross-repo subtree resolver (Phase 3a, task #222) #105
360
workspace-server/internal/handlers/org_external.go
Normal file
360
workspace-server/internal/handlers/org_external.go
Normal file
@ -0,0 +1,360 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// External-ref resolver — gitops-style cross-repo subtree composition.
|
||||
// Internal#77 RFC, Phase 3a (task #222). Prior art: Helm subcharts +
|
||||
// dependency cache, Kustomize remote bases, Terraform module sources.
|
||||
//
|
||||
// Schema (a `!external`-tagged mapping anywhere a workspace entry is
|
||||
// allowed — workspaces:, roots:, children:):
|
||||
//
|
||||
// - !external
|
||||
// repo: molecule-ai/molecule-dev-department
|
||||
// ref: main
|
||||
// path: dev-lead/workspace.yaml
|
||||
//
|
||||
// At resolve time, the platform fetches the repo at ref into a content-
|
||||
// addressable cache under <rootDir>/.external-cache/<repo>/<sha>/, loads
|
||||
// the yaml at <cacheDir>/<path>, rewrites every files_dir + relative
|
||||
// !include path to be cache-prefixed, then grafts the result in place of
|
||||
// the !external node. Downstream pipeline (resolveInsideRoot, plugin
|
||||
// merge, CopyTemplateToContainer) sees ordinary in-tree paths.
|
||||
|
||||
// ExternalRef is the deserialized form of an `!external`-tagged mapping.
|
||||
type ExternalRef struct {
|
||||
Repo string `yaml:"repo"`
|
||||
Ref string `yaml:"ref"`
|
||||
Path string `yaml:"path"`
|
||||
|
||||
// URL overrides the default Gitea host. Optional; defaults to
|
||||
// MOLECULE_EXTERNAL_GITEA_URL env or git.moleculesai.app.
|
||||
URL string `yaml:"url,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
// maxExternalDepth caps recursion through nested `!external`s. Lower
|
||||
// than maxIncludeDepth (16) because each level may issue a network
|
||||
// fetch. Composition that genuinely needs >4 layers is a smell.
|
||||
maxExternalDepth = 4
|
||||
|
||||
// externalCacheDirName is the per-template cache subdir under rootDir.
|
||||
// Content-addressable: keyed by (repo, sha). Operators add this to
|
||||
// .gitignore — cache is platform-mutated, not source-tracked.
|
||||
externalCacheDirName = ".external-cache"
|
||||
|
||||
// gitFetchTimeout caps a single clone operation. Conservative —
|
||||
// org template fetches are typically <100KB.
|
||||
gitFetchTimeout = 60 * time.Second
|
||||
)
|
||||
|
||||
// safeRefPattern restricts `ref` values to characters git itself accepts
|
||||
// for branch / tag / SHA. Belt-and-braces over git's own validation.
|
||||
var safeRefPattern = regexp.MustCompile(`^[a-zA-Z0-9_./-]+$`)
|
||||
|
||||
// allowlistedHostPath returns true if `<host>/<repo>` matches the
|
||||
// configured allowlist. Default allowlist: git.moleculesai.app/molecule-ai/.
|
||||
// Override via MOLECULE_EXTERNAL_REPO_ALLOWLIST env var (comma-separated
|
||||
// patterns). Patterns are matched as prefixes (with trailing-slash
|
||||
// semantics) or as exact matches. Trailing /* is treated as "any
|
||||
// descendants of this prefix".
|
||||
//
|
||||
// Examples:
|
||||
// - "git.moleculesai.app/molecule-ai/" → matches molecule-ai/* (any repo)
|
||||
// - "git.moleculesai.app/molecule-ai/*" → same; trailing /* normalized to /
|
||||
// - "git.moleculesai.app/molecule-ai/molecule-dev-department" → exact
|
||||
// - "git.moleculesai.app/" → matches everything on that host
|
||||
func allowlistedHostPath(host, repoPath string) bool {
|
||||
allow := os.Getenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST")
|
||||
if allow == "" {
|
||||
allow = "git.moleculesai.app/molecule-ai/"
|
||||
}
|
||||
hp := host + "/" + repoPath
|
||||
for _, pat := range strings.Split(allow, ",") {
|
||||
pat = strings.TrimSpace(pat)
|
||||
if pat == "" {
|
||||
continue
|
||||
}
|
||||
// Normalize trailing /* → /
|
||||
pat = strings.TrimSuffix(pat, "*")
|
||||
if pat == hp {
|
||||
return true
|
||||
}
|
||||
if strings.HasSuffix(pat, "/") && strings.HasPrefix(hp+"/", pat) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// externalFetcher abstracts the git-clone-into-cache step. Production
|
||||
// uses gitFetcher (shells out to git); tests inject a fake that
|
||||
// pre-stages content in a temp dir.
|
||||
type externalFetcher interface {
|
||||
// Fetch ensures rootDir/.external-cache/<safe-repo>/<sha>/ contains
|
||||
// the repo content at the given ref. Returns the absolute cache
|
||||
// dir + the resolved SHA. Cache hit = no network. Cache miss =
|
||||
// clone.
|
||||
Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (cacheDir, sha string, err error)
|
||||
}
|
||||
|
||||
// defaultExternalFetcher is the package-level fetcher injection point.
|
||||
// Production code uses the git-shell fetcher; tests override via
|
||||
// SetExternalFetcherForTest.
|
||||
var defaultExternalFetcher externalFetcher = &gitFetcher{}
|
||||
|
||||
// SetExternalFetcherForTest swaps the fetcher for testing. Returns a
|
||||
// cleanup func that restores the previous fetcher.
|
||||
func SetExternalFetcherForTest(f externalFetcher) func() {
|
||||
prev := defaultExternalFetcher
|
||||
defaultExternalFetcher = f
|
||||
return func() { defaultExternalFetcher = prev }
|
||||
}
|
||||
|
||||
// resolveExternalMapping replaces an `!external`-tagged mapping node
|
||||
// with the loaded + path-rewritten yaml content from the fetched repo.
|
||||
//
|
||||
// `currentDir` and `rootDir` are inherited from expandNode's resolve
|
||||
// frame. `visited` tracks (repo, sha, path) tuples for cycle detection
|
||||
// across nested externals.
|
||||
func resolveExternalMapping(n *yaml.Node, currentDir, rootDir string, visited map[string]bool, depth int) error {
|
||||
if depth > maxExternalDepth {
|
||||
return fmt.Errorf("!external: max depth %d exceeded (possible cycle)", maxExternalDepth)
|
||||
}
|
||||
if rootDir == "" {
|
||||
return fmt.Errorf("!external at line %d requires a dir-based org template (no rootDir in inline-template mode)", n.Line)
|
||||
}
|
||||
|
||||
var ref ExternalRef
|
||||
if err := n.Decode(&ref); err != nil {
|
||||
return fmt.Errorf("!external at line %d: decode: %w", n.Line, err)
|
||||
}
|
||||
if ref.Repo == "" || ref.Ref == "" || ref.Path == "" {
|
||||
return fmt.Errorf("!external at line %d: repo, ref, path are all required (got %+v)", n.Line, ref)
|
||||
}
|
||||
if !safeRefPattern.MatchString(ref.Ref) {
|
||||
return fmt.Errorf("!external at line %d: ref %q contains disallowed characters", n.Line, ref.Ref)
|
||||
}
|
||||
if strings.Contains(ref.Path, "..") || strings.HasPrefix(ref.Path, "/") {
|
||||
return fmt.Errorf("!external at line %d: path %q must be relative-and-down-only", n.Line, ref.Path)
|
||||
}
|
||||
|
||||
host := ref.URL
|
||||
if host == "" {
|
||||
host = os.Getenv("MOLECULE_EXTERNAL_GITEA_URL")
|
||||
}
|
||||
if host == "" {
|
||||
host = "git.moleculesai.app"
|
||||
}
|
||||
host = strings.TrimPrefix(strings.TrimPrefix(host, "https://"), "http://")
|
||||
host = strings.TrimSuffix(host, "/")
|
||||
|
||||
if !allowlistedHostPath(host, ref.Repo) {
|
||||
return fmt.Errorf("!external at line %d: %s/%s not in MOLECULE_EXTERNAL_REPO_ALLOWLIST", n.Line, host, ref.Repo)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), gitFetchTimeout)
|
||||
defer cancel()
|
||||
|
||||
cacheDir, sha, err := defaultExternalFetcher.Fetch(ctx, rootDir, host, ref.Repo, ref.Ref)
|
||||
if err != nil {
|
||||
return fmt.Errorf("!external at line %d: fetch %s/%s@%s: %w", n.Line, host, ref.Repo, ref.Ref, err)
|
||||
}
|
||||
|
||||
// Cycle key: (repo, sha, path) — same external content reachable
|
||||
// via two paths is fine, but a self-referential cycle isn't.
|
||||
cycleKey := fmt.Sprintf("%s/%s@%s/%s", host, ref.Repo, sha, ref.Path)
|
||||
if visited[cycleKey] {
|
||||
return fmt.Errorf("!external cycle detected at %q (line %d)", cycleKey, n.Line)
|
||||
}
|
||||
|
||||
// Validate path resolves inside the cache dir (anti-traversal).
|
||||
yamlPathAbs, err := resolveInsideRoot(cacheDir, ref.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("!external at line %d: path %q: %w", n.Line, ref.Path, err)
|
||||
}
|
||||
if _, err := os.Stat(yamlPathAbs); err != nil {
|
||||
return fmt.Errorf("!external at line %d: %s/%s@%s does not contain %q: %w", n.Line, host, ref.Repo, sha, ref.Path, err)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(yamlPathAbs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("!external at line %d: read %q: %w", n.Line, yamlPathAbs, err)
|
||||
}
|
||||
|
||||
var sub yaml.Node
|
||||
if err := yaml.Unmarshal(data, &sub); err != nil {
|
||||
return fmt.Errorf("!external at line %d: parse %q: %w", n.Line, yamlPathAbs, err)
|
||||
}
|
||||
root := &sub
|
||||
if root.Kind == yaml.DocumentNode && len(root.Content) == 1 {
|
||||
root = root.Content[0]
|
||||
}
|
||||
|
||||
// Recurse FIRST: load all nested !include / !external content into
|
||||
// the tree. Then rewrite ALL files_dir scalars in the fully-resolved
|
||||
// tree (top + nested) with the cache prefix in one pass. Doing
|
||||
// rewrite-before-recurse would leave nested-loaded files_dir paths
|
||||
// unprefixed.
|
||||
visited[cycleKey] = true
|
||||
defer delete(visited, cycleKey)
|
||||
|
||||
subDir := filepath.Dir(yamlPathAbs)
|
||||
if err := expandNode(root, subDir, rootDir, visited, depth+1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Path rewrite: prefix every files_dir scalar in the fully-resolved
|
||||
// content with the cache-relative-from-rootDir prefix. After this
|
||||
// pass, fetched workspaces look like ordinary in-tree workspaces.
|
||||
cachePrefix, err := filepath.Rel(rootDir, cacheDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("!external at line %d: cannot compute cache prefix: %w", n.Line, err)
|
||||
}
|
||||
rewriteFilesDirAndIncludes(root, cachePrefix)
|
||||
|
||||
// Replace the !external mapping with the resolved content in-place.
|
||||
*n = *root
|
||||
if n.Tag == "!external" {
|
||||
n.Tag = ""
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rewriteFilesDirAndIncludes walks the yaml node tree and prepends
|
||||
// cachePrefix to every files_dir scalar value. Idempotent: if a
|
||||
// files_dir value already starts with the prefix, no-op.
|
||||
//
|
||||
// !include paths are NOT rewritten. They resolve relative to their
|
||||
// containing file's directory (subDir in expandNode), and after fetch
|
||||
// that directory IS inside the cache, so relative paths Just Work
|
||||
// without any rewrite. Rewriting them would double-prefix.
|
||||
//
|
||||
// files_dir DOES need rewriting because it's consumed at workspace-
|
||||
// provisioning time relative to orgBaseDir (the parent template's root),
|
||||
// not relative to the workspace.yaml's containing dir.
|
||||
func rewriteFilesDirAndIncludes(n *yaml.Node, cachePrefix string) {
|
||||
if n == nil {
|
||||
return
|
||||
}
|
||||
if n.Kind == yaml.MappingNode {
|
||||
for i := 0; i+1 < len(n.Content); i += 2 {
|
||||
key, value := n.Content[i], n.Content[i+1]
|
||||
if key.Kind == yaml.ScalarNode && key.Value == "files_dir" && value.Kind == yaml.ScalarNode {
|
||||
if !strings.HasPrefix(value.Value, cachePrefix+string(filepath.Separator)) && value.Value != cachePrefix {
|
||||
value.Value = filepath.Join(cachePrefix, value.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, child := range n.Content {
|
||||
rewriteFilesDirAndIncludes(child, cachePrefix)
|
||||
}
|
||||
}
|
||||
|
||||
// safeRepoCacheDir converts a repo path like "molecule-ai/foo" into a
|
||||
// filesystem-safe segment "molecule-ai__foo". Avoids nesting cache dirs
|
||||
// (which would complicate cleanup).
|
||||
func safeRepoCacheDir(host, repoPath string) string {
|
||||
hp := host + "/" + repoPath
|
||||
hp = strings.ReplaceAll(hp, "/", "__")
|
||||
hp = strings.ReplaceAll(hp, ":", "_")
|
||||
return hp
|
||||
}
|
||||
|
||||
// gitFetcher is the production externalFetcher: shells out to `git` to
|
||||
// clone the repo at ref into the cache dir. Cache key includes the
|
||||
// resolved SHA, so different SHAs of the same ref get different cache
|
||||
// dirs (no overwrite).
|
||||
type gitFetcher struct{}
|
||||
|
||||
// Fetch resolves ref → SHA via `git ls-remote`, then `git clone --depth=1`
|
||||
// if the cache dir is missing. Auth via MOLECULE_GITEA_TOKEN injected
|
||||
// into the URL.
|
||||
func (g *gitFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
|
||||
cacheRoot := filepath.Join(rootDir, externalCacheDirName, safeRepoCacheDir(host, repoPath))
|
||||
if err := os.MkdirAll(cacheRoot, 0o755); err != nil {
|
||||
return "", "", fmt.Errorf("mkdir cache root: %w", err)
|
||||
}
|
||||
|
||||
cloneURL := buildAuthedURL(host, repoPath)
|
||||
|
||||
// 1. Resolve ref → SHA (so cache dir is content-addressable).
|
||||
sha, err := g.resolveRefToSHA(ctx, cloneURL, ref)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("ls-remote: %w", err)
|
||||
}
|
||||
|
||||
cacheDir := filepath.Join(cacheRoot, sha)
|
||||
if _, statErr := os.Stat(filepath.Join(cacheDir, ".git")); statErr == nil {
|
||||
// Cache hit.
|
||||
return cacheDir, sha, nil
|
||||
}
|
||||
|
||||
// 2. Clone.
|
||||
tmpDir := cacheDir + ".tmp." + shortHash(time.Now().String())
|
||||
cmd := exec.CommandContext(ctx, "git", "clone", "--quiet", "--depth=1", "-b", ref, cloneURL, tmpDir)
|
||||
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
os.RemoveAll(tmpDir)
|
||||
return "", "", fmt.Errorf("git clone: %w: %s", err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
// Atomic rename to final cache path.
|
||||
if err := os.Rename(tmpDir, cacheDir); err != nil {
|
||||
// Race: another import beat us to it. The other party's content
|
||||
// is at the same SHA, so it's equivalent. Cleanup our tmp.
|
||||
os.RemoveAll(tmpDir)
|
||||
if _, statErr := os.Stat(filepath.Join(cacheDir, ".git")); statErr != nil {
|
||||
return "", "", fmt.Errorf("rename clone to cache: %w", err)
|
||||
}
|
||||
}
|
||||
return cacheDir, sha, nil
|
||||
}
|
||||
|
||||
func (g *gitFetcher) resolveRefToSHA(ctx context.Context, cloneURL, ref string) (string, error) {
|
||||
cmd := exec.CommandContext(ctx, "git", "ls-remote", cloneURL, ref)
|
||||
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
line := strings.TrimSpace(string(out))
|
||||
if line == "" {
|
||||
return "", fmt.Errorf("ref %q not found", ref)
|
||||
}
|
||||
// First whitespace-separated field is the SHA.
|
||||
for i, ch := range line {
|
||||
if ch == ' ' || ch == '\t' {
|
||||
return line[:i], nil
|
||||
}
|
||||
}
|
||||
return line, nil
|
||||
}
|
||||
|
||||
func buildAuthedURL(host, repoPath string) string {
|
||||
token := os.Getenv("MOLECULE_GITEA_TOKEN")
|
||||
u := url.URL{Scheme: "https", Host: host, Path: "/" + repoPath + ".git"}
|
||||
if token != "" {
|
||||
u.User = url.UserPassword("oauth2", token)
|
||||
}
|
||||
return u.String()
|
||||
}
|
||||
|
||||
func shortHash(s string) string {
|
||||
h := sha1.Sum([]byte(s))
|
||||
return hex.EncodeToString(h[:4])
|
||||
}
|
||||
331
workspace-server/internal/handlers/org_external_test.go
Normal file
331
workspace-server/internal/handlers/org_external_test.go
Normal file
@ -0,0 +1,331 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// fakeFetcher pre-stages a "fetched" repo at a fixed path inside the
|
||||
// rootDir's .external-cache, bypassing the real git clone. Tests
|
||||
// inject this via SetExternalFetcherForTest to exercise the resolver
|
||||
// + path-rewrite logic without network.
|
||||
type fakeFetcher struct {
|
||||
// content maps "<host>/<repo>@<ref>" → a function that materializes
|
||||
// repo content under cacheDir. Returns the fake SHA to use.
|
||||
content map[string]func(cacheDir string) (sha string, err error)
|
||||
}
|
||||
|
||||
func (f *fakeFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
|
||||
key := host + "/" + repoPath + "@" + ref
|
||||
stage, ok := f.content[key]
|
||||
if !ok {
|
||||
return "", "", &fakeNotFoundError{key: key}
|
||||
}
|
||||
// Use a stable SHA for the test so cache dir is deterministic.
|
||||
cacheDir := filepath.Join(rootDir, ".external-cache", safeRepoCacheDir(host, repoPath), "deadbeef")
|
||||
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
sha, err := stage(cacheDir)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
return cacheDir, sha, nil
|
||||
}
|
||||
|
||||
type fakeNotFoundError struct{ key string }
|
||||
|
||||
func (e *fakeNotFoundError) Error() string {
|
||||
return "fake fetcher: no content registered for " + e.key
|
||||
}
|
||||
|
||||
// stageFiles writes a map of relative-path → content into cacheDir,
|
||||
// returning a fake SHA. Helper for fakeFetcher closures.
|
||||
func stageFiles(cacheDir string, files map[string]string) error {
|
||||
if err := os.MkdirAll(filepath.Join(cacheDir, ".git"), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
for path, content := range files {
|
||||
full := filepath.Join(cacheDir, path)
|
||||
if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TestResolveExternalMapping_HappyPath: a parent template with an
|
||||
// !external entry resolves cleanly into the fetched workspace + path-
|
||||
// rewrites files_dir + relative !include refs into the cache prefix.
|
||||
func TestResolveExternalMapping_HappyPath(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
|
||||
// Stub fetcher: "fetched" content has a workspace.yaml that uses
|
||||
// files_dir + nested !include relative to the fetched repo's root.
|
||||
fake := &fakeFetcher{
|
||||
content: map[string]func(string) (string, error){
|
||||
"git.moleculesai.app/molecule-ai/molecule-dev-department@main": func(cacheDir string) (string, error) {
|
||||
return "deadbeef", stageFiles(cacheDir, map[string]string{
|
||||
"dev-lead/workspace.yaml": `name: Dev Lead
|
||||
files_dir: dev-lead
|
||||
children:
|
||||
- !include ./core-lead/workspace.yaml
|
||||
`,
|
||||
"dev-lead/core-lead/workspace.yaml": `name: Core Platform Lead
|
||||
files_dir: dev-lead/core-lead
|
||||
`,
|
||||
})
|
||||
},
|
||||
},
|
||||
}
|
||||
cleanup := SetExternalFetcherForTest(fake)
|
||||
defer cleanup()
|
||||
|
||||
src := []byte(`name: Parent
|
||||
workspaces:
|
||||
- !external
|
||||
repo: molecule-ai/molecule-dev-department
|
||||
ref: main
|
||||
path: dev-lead/workspace.yaml
|
||||
`)
|
||||
|
||||
out, err := resolveYAMLIncludes(src, tmp)
|
||||
if err != nil {
|
||||
t.Fatalf("resolveYAMLIncludes: %v", err)
|
||||
}
|
||||
|
||||
var tmpl OrgTemplate
|
||||
if err := yaml.Unmarshal(out, &tmpl); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if len(tmpl.Workspaces) != 1 {
|
||||
t.Fatalf("workspaces: %+v", tmpl.Workspaces)
|
||||
}
|
||||
dev := tmpl.Workspaces[0]
|
||||
if dev.Name != "Dev Lead" {
|
||||
t.Errorf("dev.Name = %q; want Dev Lead", dev.Name)
|
||||
}
|
||||
// files_dir should be cache-prefixed.
|
||||
wantPrefix := filepath.Join(".external-cache", "git.moleculesai.app__molecule-ai__molecule-dev-department", "deadbeef")
|
||||
if !strings.HasPrefix(dev.FilesDir, wantPrefix) {
|
||||
t.Errorf("dev.FilesDir = %q; want prefix %q", dev.FilesDir, wantPrefix)
|
||||
}
|
||||
if !strings.HasSuffix(dev.FilesDir, "dev-lead") {
|
||||
t.Errorf("dev.FilesDir = %q; want suffix dev-lead", dev.FilesDir)
|
||||
}
|
||||
// Nested child: files_dir cache-prefixed, name Core Platform Lead.
|
||||
if len(dev.Children) != 1 {
|
||||
t.Fatalf("dev.Children: %+v", dev.Children)
|
||||
}
|
||||
core := dev.Children[0]
|
||||
if core.Name != "Core Platform Lead" {
|
||||
t.Errorf("core.Name = %q; want Core Platform Lead", core.Name)
|
||||
}
|
||||
if !strings.HasPrefix(core.FilesDir, wantPrefix) {
|
||||
t.Errorf("core.FilesDir = %q; want prefix %q", core.FilesDir, wantPrefix)
|
||||
}
|
||||
if !strings.HasSuffix(core.FilesDir, filepath.Join("dev-lead", "core-lead")) {
|
||||
t.Errorf("core.FilesDir = %q; want suffix dev-lead/core-lead", core.FilesDir)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveExternalMapping_AllowlistRejection: hostile yaml pointing
|
||||
// at a non-allowlisted repo gets rejected.
|
||||
func TestResolveExternalMapping_AllowlistRejection(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
|
||||
cleanup := SetExternalFetcherForTest(fake)
|
||||
defer cleanup()
|
||||
|
||||
// Default allowlist is git.moleculesai.app/molecule-ai/*.
|
||||
// github.com/foo/bar is NOT in it.
|
||||
src := []byte(`workspaces:
|
||||
- !external
|
||||
repo: foo/bar
|
||||
ref: main
|
||||
path: x.yaml
|
||||
url: github.com
|
||||
`)
|
||||
_, err := resolveYAMLIncludes(src, tmp)
|
||||
if err == nil {
|
||||
t.Fatalf("expected allowlist rejection, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "MOLECULE_EXTERNAL_REPO_ALLOWLIST") {
|
||||
t.Errorf("expected allowlist error; got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveExternalMapping_PathTraversalRejection: hostile yaml
|
||||
// with `path: ../../etc/passwd` gets rejected before fetch.
|
||||
func TestResolveExternalMapping_PathTraversalRejection(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
|
||||
cleanup := SetExternalFetcherForTest(fake)
|
||||
defer cleanup()
|
||||
|
||||
src := []byte(`workspaces:
|
||||
- !external
|
||||
repo: molecule-ai/dev-department
|
||||
ref: main
|
||||
path: ../../etc/passwd
|
||||
`)
|
||||
_, err := resolveYAMLIncludes(src, tmp)
|
||||
if err == nil {
|
||||
t.Fatalf("expected path traversal rejection, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "relative-and-down-only") {
|
||||
t.Errorf("expected path traversal error; got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveExternalMapping_BadRefRejection: non-allowlisted ref chars.
|
||||
func TestResolveExternalMapping_BadRefRejection(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
|
||||
cleanup := SetExternalFetcherForTest(fake)
|
||||
defer cleanup()
|
||||
|
||||
src := []byte(`workspaces:
|
||||
- !external
|
||||
repo: molecule-ai/dev-department
|
||||
ref: "main; rm -rf /"
|
||||
path: foo.yaml
|
||||
`)
|
||||
_, err := resolveYAMLIncludes(src, tmp)
|
||||
if err == nil || !strings.Contains(err.Error(), "disallowed characters") {
|
||||
t.Errorf("expected ref-validation error; got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveExternalMapping_MissingRequiredFields: repo / ref / path
|
||||
// are all required.
|
||||
func TestResolveExternalMapping_MissingRequiredFields(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
|
||||
cleanup := SetExternalFetcherForTest(fake)
|
||||
defer cleanup()
|
||||
|
||||
cases := []string{
|
||||
// missing repo
|
||||
`workspaces:
|
||||
- !external
|
||||
ref: main
|
||||
path: x.yaml
|
||||
`,
|
||||
// missing ref
|
||||
`workspaces:
|
||||
- !external
|
||||
repo: molecule-ai/x
|
||||
path: x.yaml
|
||||
`,
|
||||
// missing path
|
||||
`workspaces:
|
||||
- !external
|
||||
repo: molecule-ai/x
|
||||
ref: main
|
||||
`,
|
||||
}
|
||||
for i, src := range cases {
|
||||
_, err := resolveYAMLIncludes([]byte(src), tmp)
|
||||
if err == nil {
|
||||
t.Errorf("case %d: expected required-field error, got nil", i)
|
||||
} else if !strings.Contains(err.Error(), "required") {
|
||||
t.Errorf("case %d: want 'required' in error; got %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRewriteFilesDirAndIncludes: verify the path-rewrite walker
|
||||
// prefixes files_dir scalars. !include scalars are NOT rewritten —
|
||||
// they resolve relative to their containing file's dir, which post-
|
||||
// fetch is naturally inside the cache.
|
||||
func TestRewriteFilesDirAndIncludes(t *testing.T) {
|
||||
src := `name: Foo
|
||||
files_dir: dev-lead
|
||||
children:
|
||||
- !include ./bar/workspace.yaml
|
||||
- !include other-team.yaml
|
||||
inner:
|
||||
files_dir: dev-lead/sub
|
||||
`
|
||||
var n yaml.Node
|
||||
if err := yaml.Unmarshal([]byte(src), &n); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
rewriteFilesDirAndIncludes(&n, ".external-cache/foo/bar")
|
||||
|
||||
out, err := yaml.Marshal(&n)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got := string(out)
|
||||
for _, want := range []string{
|
||||
"files_dir: .external-cache/foo/bar/dev-lead",
|
||||
"files_dir: .external-cache/foo/bar/dev-lead/sub",
|
||||
// !include preserved as-is; resolves naturally via subDir.
|
||||
"!include ./bar/workspace.yaml",
|
||||
"!include other-team.yaml",
|
||||
} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("missing %q in:\n%s", want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRewriteFilesDirAndIncludes_Idempotent: re-running the rewriter
|
||||
// on already-prefixed files_dir doesn't double-prefix.
|
||||
func TestRewriteFilesDirAndIncludes_Idempotent(t *testing.T) {
|
||||
src := `files_dir: .external-cache/foo/bar/dev-lead
|
||||
inner:
|
||||
files_dir: .external-cache/foo/bar/dev-lead/sub
|
||||
`
|
||||
var n yaml.Node
|
||||
if err := yaml.Unmarshal([]byte(src), &n); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
rewriteFilesDirAndIncludes(&n, ".external-cache/foo/bar")
|
||||
|
||||
out, _ := yaml.Marshal(&n)
|
||||
got := string(out)
|
||||
if strings.Contains(got, ".external-cache/foo/bar/.external-cache") {
|
||||
t.Errorf("double-prefix detected:\n%s", got)
|
||||
}
|
||||
// Should still be valid (single-prefixed) afterwards.
|
||||
for _, want := range []string{
|
||||
"files_dir: .external-cache/foo/bar/dev-lead",
|
||||
"files_dir: .external-cache/foo/bar/dev-lead/sub",
|
||||
} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("expected unchanged %q in:\n%s", want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestAllowlistedHostPath: env-var override + glob matching.
|
||||
func TestAllowlistedHostPath(t *testing.T) {
|
||||
t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "")
|
||||
if !allowlistedHostPath("git.moleculesai.app", "molecule-ai/foo") {
|
||||
t.Error("default allowlist should accept molecule-ai/*")
|
||||
}
|
||||
if allowlistedHostPath("github.com", "molecule-ai/foo") {
|
||||
t.Error("default allowlist should reject github.com")
|
||||
}
|
||||
t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "github.com/me/*,git.moleculesai.app/*")
|
||||
if !allowlistedHostPath("github.com", "me/x") {
|
||||
t.Error("override should accept github.com/me/*")
|
||||
}
|
||||
if !allowlistedHostPath("git.moleculesai.app", "any/repo") {
|
||||
t.Error("override should accept git.moleculesai.app/*")
|
||||
}
|
||||
if allowlistedHostPath("github.com", "evil/x") {
|
||||
t.Error("override should reject github.com/evil/*")
|
||||
}
|
||||
}
|
||||
@ -76,6 +76,12 @@ func expandNode(n *yaml.Node, currentDir, rootDir string, visited map[string]boo
|
||||
return resolveIncludeScalar(n, currentDir, rootDir, visited, depth)
|
||||
}
|
||||
|
||||
// `!external`-tagged mapping: gitops cross-repo subtree composition.
|
||||
// See org_external.go (internal#77 / task #222).
|
||||
if n.Kind == yaml.MappingNode && n.Tag == "!external" {
|
||||
return resolveExternalMapping(n, currentDir, rootDir, visited, depth)
|
||||
}
|
||||
|
||||
for _, child := range n.Content {
|
||||
if err := expandNode(child, currentDir, rootDir, visited, depth); err != nil {
|
||||
return err
|
||||
|
||||
Loading…
Reference in New Issue
Block a user