Merge pull request 'feat(org-import): !external cross-repo subtree resolver (Phase 3a, task #222)' (#105) from feature/external-ref-resolver into staging
All checks were successful
CodeQL / Analyze (${{ matrix.language }}) (javascript-typescript) (push) Successful in 1s
CodeQL / Analyze (${{ matrix.language }}) (go) (push) Successful in 1s
CodeQL / Analyze (${{ matrix.language }}) (python) (push) Successful in 1s
Block internal-flavored paths / Block forbidden paths (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
CI / Detect changes (push) Successful in 8s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 9s
Handlers Postgres Integration / detect-changes (push) Successful in 9s
Harness Replays / detect-changes (push) Successful in 9s
E2E API Smoke Test / detect-changes (push) Successful in 9s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 8s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
CI / Python Lint & Test (push) Successful in 3s
CI / Canvas (Next.js) (push) Successful in 4s
CI / Canvas Deploy Reminder (push) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 5s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 4s
Harness Replays / Harness Replays (push) Successful in 1m4s
publish-workspace-server-image / build-and-push (push) Successful in 1m50s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m3s
CI / Platform (Go) (push) Successful in 3m9s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m6s

This commit is contained in:
claude-ceo-assistant 2026-05-08 12:23:33 +00:00
commit ef0ef30116
3 changed files with 697 additions and 0 deletions

View File

@ -0,0 +1,360 @@
package handlers
import (
"context"
"crypto/sha1"
"encoding/hex"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// External-ref resolver — gitops-style cross-repo subtree composition.
// Internal#77 RFC, Phase 3a (task #222). Prior art: Helm subcharts +
// dependency cache, Kustomize remote bases, Terraform module sources.
//
// Schema (a `!external`-tagged mapping anywhere a workspace entry is
// allowed — workspaces:, roots:, children:):
//
// - !external
// repo: molecule-ai/molecule-dev-department
// ref: main
// path: dev-lead/workspace.yaml
//
// At resolve time, the platform fetches the repo at ref into a content-
// addressable cache under <rootDir>/.external-cache/<repo>/<sha>/, loads
// the yaml at <cacheDir>/<path>, rewrites every files_dir + relative
// !include path to be cache-prefixed, then grafts the result in place of
// the !external node. Downstream pipeline (resolveInsideRoot, plugin
// merge, CopyTemplateToContainer) sees ordinary in-tree paths.
// ExternalRef is the deserialized form of an `!external`-tagged mapping.
type ExternalRef struct {
Repo string `yaml:"repo"`
Ref string `yaml:"ref"`
Path string `yaml:"path"`
// URL overrides the default Gitea host. Optional; defaults to
// MOLECULE_EXTERNAL_GITEA_URL env or git.moleculesai.app.
URL string `yaml:"url,omitempty"`
}
const (
// maxExternalDepth caps recursion through nested `!external`s. Lower
// than maxIncludeDepth (16) because each level may issue a network
// fetch. Composition that genuinely needs >4 layers is a smell.
maxExternalDepth = 4
// externalCacheDirName is the per-template cache subdir under rootDir.
// Content-addressable: keyed by (repo, sha). Operators add this to
// .gitignore — cache is platform-mutated, not source-tracked.
externalCacheDirName = ".external-cache"
// gitFetchTimeout caps a single clone operation. Conservative —
// org template fetches are typically <100KB.
gitFetchTimeout = 60 * time.Second
)
// safeRefPattern restricts `ref` values to characters git itself accepts
// for branch / tag / SHA. Belt-and-braces over git's own validation.
var safeRefPattern = regexp.MustCompile(`^[a-zA-Z0-9_./-]+$`)
// allowlistedHostPath returns true if `<host>/<repo>` matches the
// configured allowlist. Default allowlist: git.moleculesai.app/molecule-ai/.
// Override via MOLECULE_EXTERNAL_REPO_ALLOWLIST env var (comma-separated
// patterns). Patterns are matched as prefixes (with trailing-slash
// semantics) or as exact matches. Trailing /* is treated as "any
// descendants of this prefix".
//
// Examples:
// - "git.moleculesai.app/molecule-ai/" → matches molecule-ai/* (any repo)
// - "git.moleculesai.app/molecule-ai/*" → same; trailing /* normalized to /
// - "git.moleculesai.app/molecule-ai/molecule-dev-department" → exact
// - "git.moleculesai.app/" → matches everything on that host
func allowlistedHostPath(host, repoPath string) bool {
allow := os.Getenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST")
if allow == "" {
allow = "git.moleculesai.app/molecule-ai/"
}
hp := host + "/" + repoPath
for _, pat := range strings.Split(allow, ",") {
pat = strings.TrimSpace(pat)
if pat == "" {
continue
}
// Normalize trailing /* → /
pat = strings.TrimSuffix(pat, "*")
if pat == hp {
return true
}
if strings.HasSuffix(pat, "/") && strings.HasPrefix(hp+"/", pat) {
return true
}
}
return false
}
// externalFetcher abstracts the git-clone-into-cache step. Production
// uses gitFetcher (shells out to git); tests inject a fake that
// pre-stages content in a temp dir.
type externalFetcher interface {
// Fetch ensures rootDir/.external-cache/<safe-repo>/<sha>/ contains
// the repo content at the given ref. Returns the absolute cache
// dir + the resolved SHA. Cache hit = no network. Cache miss =
// clone.
Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (cacheDir, sha string, err error)
}
// defaultExternalFetcher is the package-level fetcher injection point.
// Production code uses the git-shell fetcher; tests override via
// SetExternalFetcherForTest.
var defaultExternalFetcher externalFetcher = &gitFetcher{}
// SetExternalFetcherForTest swaps the fetcher for testing. Returns a
// cleanup func that restores the previous fetcher.
func SetExternalFetcherForTest(f externalFetcher) func() {
prev := defaultExternalFetcher
defaultExternalFetcher = f
return func() { defaultExternalFetcher = prev }
}
// resolveExternalMapping replaces an `!external`-tagged mapping node
// with the loaded + path-rewritten yaml content from the fetched repo.
//
// `currentDir` and `rootDir` are inherited from expandNode's resolve
// frame. `visited` tracks (repo, sha, path) tuples for cycle detection
// across nested externals.
func resolveExternalMapping(n *yaml.Node, currentDir, rootDir string, visited map[string]bool, depth int) error {
if depth > maxExternalDepth {
return fmt.Errorf("!external: max depth %d exceeded (possible cycle)", maxExternalDepth)
}
if rootDir == "" {
return fmt.Errorf("!external at line %d requires a dir-based org template (no rootDir in inline-template mode)", n.Line)
}
var ref ExternalRef
if err := n.Decode(&ref); err != nil {
return fmt.Errorf("!external at line %d: decode: %w", n.Line, err)
}
if ref.Repo == "" || ref.Ref == "" || ref.Path == "" {
return fmt.Errorf("!external at line %d: repo, ref, path are all required (got %+v)", n.Line, ref)
}
if !safeRefPattern.MatchString(ref.Ref) {
return fmt.Errorf("!external at line %d: ref %q contains disallowed characters", n.Line, ref.Ref)
}
if strings.Contains(ref.Path, "..") || strings.HasPrefix(ref.Path, "/") {
return fmt.Errorf("!external at line %d: path %q must be relative-and-down-only", n.Line, ref.Path)
}
host := ref.URL
if host == "" {
host = os.Getenv("MOLECULE_EXTERNAL_GITEA_URL")
}
if host == "" {
host = "git.moleculesai.app"
}
host = strings.TrimPrefix(strings.TrimPrefix(host, "https://"), "http://")
host = strings.TrimSuffix(host, "/")
if !allowlistedHostPath(host, ref.Repo) {
return fmt.Errorf("!external at line %d: %s/%s not in MOLECULE_EXTERNAL_REPO_ALLOWLIST", n.Line, host, ref.Repo)
}
ctx, cancel := context.WithTimeout(context.Background(), gitFetchTimeout)
defer cancel()
cacheDir, sha, err := defaultExternalFetcher.Fetch(ctx, rootDir, host, ref.Repo, ref.Ref)
if err != nil {
return fmt.Errorf("!external at line %d: fetch %s/%s@%s: %w", n.Line, host, ref.Repo, ref.Ref, err)
}
// Cycle key: (repo, sha, path) — same external content reachable
// via two paths is fine, but a self-referential cycle isn't.
cycleKey := fmt.Sprintf("%s/%s@%s/%s", host, ref.Repo, sha, ref.Path)
if visited[cycleKey] {
return fmt.Errorf("!external cycle detected at %q (line %d)", cycleKey, n.Line)
}
// Validate path resolves inside the cache dir (anti-traversal).
yamlPathAbs, err := resolveInsideRoot(cacheDir, ref.Path)
if err != nil {
return fmt.Errorf("!external at line %d: path %q: %w", n.Line, ref.Path, err)
}
if _, err := os.Stat(yamlPathAbs); err != nil {
return fmt.Errorf("!external at line %d: %s/%s@%s does not contain %q: %w", n.Line, host, ref.Repo, sha, ref.Path, err)
}
data, err := os.ReadFile(yamlPathAbs)
if err != nil {
return fmt.Errorf("!external at line %d: read %q: %w", n.Line, yamlPathAbs, err)
}
var sub yaml.Node
if err := yaml.Unmarshal(data, &sub); err != nil {
return fmt.Errorf("!external at line %d: parse %q: %w", n.Line, yamlPathAbs, err)
}
root := &sub
if root.Kind == yaml.DocumentNode && len(root.Content) == 1 {
root = root.Content[0]
}
// Recurse FIRST: load all nested !include / !external content into
// the tree. Then rewrite ALL files_dir scalars in the fully-resolved
// tree (top + nested) with the cache prefix in one pass. Doing
// rewrite-before-recurse would leave nested-loaded files_dir paths
// unprefixed.
visited[cycleKey] = true
defer delete(visited, cycleKey)
subDir := filepath.Dir(yamlPathAbs)
if err := expandNode(root, subDir, rootDir, visited, depth+1); err != nil {
return err
}
// Path rewrite: prefix every files_dir scalar in the fully-resolved
// content with the cache-relative-from-rootDir prefix. After this
// pass, fetched workspaces look like ordinary in-tree workspaces.
cachePrefix, err := filepath.Rel(rootDir, cacheDir)
if err != nil {
return fmt.Errorf("!external at line %d: cannot compute cache prefix: %w", n.Line, err)
}
rewriteFilesDirAndIncludes(root, cachePrefix)
// Replace the !external mapping with the resolved content in-place.
*n = *root
if n.Tag == "!external" {
n.Tag = ""
}
return nil
}
// rewriteFilesDirAndIncludes walks the yaml node tree and prepends
// cachePrefix to every files_dir scalar value. Idempotent: if a
// files_dir value already starts with the prefix, no-op.
//
// !include paths are NOT rewritten. They resolve relative to their
// containing file's directory (subDir in expandNode), and after fetch
// that directory IS inside the cache, so relative paths Just Work
// without any rewrite. Rewriting them would double-prefix.
//
// files_dir DOES need rewriting because it's consumed at workspace-
// provisioning time relative to orgBaseDir (the parent template's root),
// not relative to the workspace.yaml's containing dir.
func rewriteFilesDirAndIncludes(n *yaml.Node, cachePrefix string) {
if n == nil {
return
}
if n.Kind == yaml.MappingNode {
for i := 0; i+1 < len(n.Content); i += 2 {
key, value := n.Content[i], n.Content[i+1]
if key.Kind == yaml.ScalarNode && key.Value == "files_dir" && value.Kind == yaml.ScalarNode {
if !strings.HasPrefix(value.Value, cachePrefix+string(filepath.Separator)) && value.Value != cachePrefix {
value.Value = filepath.Join(cachePrefix, value.Value)
}
}
}
}
for _, child := range n.Content {
rewriteFilesDirAndIncludes(child, cachePrefix)
}
}
// safeRepoCacheDir converts a repo path like "molecule-ai/foo" into a
// filesystem-safe segment "molecule-ai__foo". Avoids nesting cache dirs
// (which would complicate cleanup).
func safeRepoCacheDir(host, repoPath string) string {
hp := host + "/" + repoPath
hp = strings.ReplaceAll(hp, "/", "__")
hp = strings.ReplaceAll(hp, ":", "_")
return hp
}
// gitFetcher is the production externalFetcher: shells out to `git` to
// clone the repo at ref into the cache dir. Cache key includes the
// resolved SHA, so different SHAs of the same ref get different cache
// dirs (no overwrite).
type gitFetcher struct{}
// Fetch resolves ref → SHA via `git ls-remote`, then `git clone --depth=1`
// if the cache dir is missing. Auth via MOLECULE_GITEA_TOKEN injected
// into the URL.
func (g *gitFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
cacheRoot := filepath.Join(rootDir, externalCacheDirName, safeRepoCacheDir(host, repoPath))
if err := os.MkdirAll(cacheRoot, 0o755); err != nil {
return "", "", fmt.Errorf("mkdir cache root: %w", err)
}
cloneURL := buildAuthedURL(host, repoPath)
// 1. Resolve ref → SHA (so cache dir is content-addressable).
sha, err := g.resolveRefToSHA(ctx, cloneURL, ref)
if err != nil {
return "", "", fmt.Errorf("ls-remote: %w", err)
}
cacheDir := filepath.Join(cacheRoot, sha)
if _, statErr := os.Stat(filepath.Join(cacheDir, ".git")); statErr == nil {
// Cache hit.
return cacheDir, sha, nil
}
// 2. Clone.
tmpDir := cacheDir + ".tmp." + shortHash(time.Now().String())
cmd := exec.CommandContext(ctx, "git", "clone", "--quiet", "--depth=1", "-b", ref, cloneURL, tmpDir)
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
if out, err := cmd.CombinedOutput(); err != nil {
os.RemoveAll(tmpDir)
return "", "", fmt.Errorf("git clone: %w: %s", err, strings.TrimSpace(string(out)))
}
// Atomic rename to final cache path.
if err := os.Rename(tmpDir, cacheDir); err != nil {
// Race: another import beat us to it. The other party's content
// is at the same SHA, so it's equivalent. Cleanup our tmp.
os.RemoveAll(tmpDir)
if _, statErr := os.Stat(filepath.Join(cacheDir, ".git")); statErr != nil {
return "", "", fmt.Errorf("rename clone to cache: %w", err)
}
}
return cacheDir, sha, nil
}
func (g *gitFetcher) resolveRefToSHA(ctx context.Context, cloneURL, ref string) (string, error) {
cmd := exec.CommandContext(ctx, "git", "ls-remote", cloneURL, ref)
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
out, err := cmd.Output()
if err != nil {
return "", err
}
line := strings.TrimSpace(string(out))
if line == "" {
return "", fmt.Errorf("ref %q not found", ref)
}
// First whitespace-separated field is the SHA.
for i, ch := range line {
if ch == ' ' || ch == '\t' {
return line[:i], nil
}
}
return line, nil
}
func buildAuthedURL(host, repoPath string) string {
token := os.Getenv("MOLECULE_GITEA_TOKEN")
u := url.URL{Scheme: "https", Host: host, Path: "/" + repoPath + ".git"}
if token != "" {
u.User = url.UserPassword("oauth2", token)
}
return u.String()
}
func shortHash(s string) string {
h := sha1.Sum([]byte(s))
return hex.EncodeToString(h[:4])
}

View File

@ -0,0 +1,331 @@
package handlers
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"gopkg.in/yaml.v3"
)
// fakeFetcher pre-stages a "fetched" repo at a fixed path inside the
// rootDir's .external-cache, bypassing the real git clone. Tests
// inject this via SetExternalFetcherForTest to exercise the resolver
// + path-rewrite logic without network.
type fakeFetcher struct {
// content maps "<host>/<repo>@<ref>" → a function that materializes
// repo content under cacheDir. Returns the fake SHA to use.
content map[string]func(cacheDir string) (sha string, err error)
}
func (f *fakeFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
key := host + "/" + repoPath + "@" + ref
stage, ok := f.content[key]
if !ok {
return "", "", &fakeNotFoundError{key: key}
}
// Use a stable SHA for the test so cache dir is deterministic.
cacheDir := filepath.Join(rootDir, ".external-cache", safeRepoCacheDir(host, repoPath), "deadbeef")
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
return "", "", err
}
sha, err := stage(cacheDir)
if err != nil {
return "", "", err
}
return cacheDir, sha, nil
}
type fakeNotFoundError struct{ key string }
func (e *fakeNotFoundError) Error() string {
return "fake fetcher: no content registered for " + e.key
}
// stageFiles writes a map of relative-path → content into cacheDir,
// returning a fake SHA. Helper for fakeFetcher closures.
func stageFiles(cacheDir string, files map[string]string) error {
if err := os.MkdirAll(filepath.Join(cacheDir, ".git"), 0o755); err != nil {
return err
}
for path, content := range files {
full := filepath.Join(cacheDir, path)
if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
return err
}
if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
return err
}
}
return nil
}
// TestResolveExternalMapping_HappyPath: a parent template with an
// !external entry resolves cleanly into the fetched workspace + path-
// rewrites files_dir + relative !include refs into the cache prefix.
func TestResolveExternalMapping_HappyPath(t *testing.T) {
tmp := t.TempDir()
// Stub fetcher: "fetched" content has a workspace.yaml that uses
// files_dir + nested !include relative to the fetched repo's root.
fake := &fakeFetcher{
content: map[string]func(string) (string, error){
"git.moleculesai.app/molecule-ai/molecule-dev-department@main": func(cacheDir string) (string, error) {
return "deadbeef", stageFiles(cacheDir, map[string]string{
"dev-lead/workspace.yaml": `name: Dev Lead
files_dir: dev-lead
children:
- !include ./core-lead/workspace.yaml
`,
"dev-lead/core-lead/workspace.yaml": `name: Core Platform Lead
files_dir: dev-lead/core-lead
`,
})
},
},
}
cleanup := SetExternalFetcherForTest(fake)
defer cleanup()
src := []byte(`name: Parent
workspaces:
- !external
repo: molecule-ai/molecule-dev-department
ref: main
path: dev-lead/workspace.yaml
`)
out, err := resolveYAMLIncludes(src, tmp)
if err != nil {
t.Fatalf("resolveYAMLIncludes: %v", err)
}
var tmpl OrgTemplate
if err := yaml.Unmarshal(out, &tmpl); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if len(tmpl.Workspaces) != 1 {
t.Fatalf("workspaces: %+v", tmpl.Workspaces)
}
dev := tmpl.Workspaces[0]
if dev.Name != "Dev Lead" {
t.Errorf("dev.Name = %q; want Dev Lead", dev.Name)
}
// files_dir should be cache-prefixed.
wantPrefix := filepath.Join(".external-cache", "git.moleculesai.app__molecule-ai__molecule-dev-department", "deadbeef")
if !strings.HasPrefix(dev.FilesDir, wantPrefix) {
t.Errorf("dev.FilesDir = %q; want prefix %q", dev.FilesDir, wantPrefix)
}
if !strings.HasSuffix(dev.FilesDir, "dev-lead") {
t.Errorf("dev.FilesDir = %q; want suffix dev-lead", dev.FilesDir)
}
// Nested child: files_dir cache-prefixed, name Core Platform Lead.
if len(dev.Children) != 1 {
t.Fatalf("dev.Children: %+v", dev.Children)
}
core := dev.Children[0]
if core.Name != "Core Platform Lead" {
t.Errorf("core.Name = %q; want Core Platform Lead", core.Name)
}
if !strings.HasPrefix(core.FilesDir, wantPrefix) {
t.Errorf("core.FilesDir = %q; want prefix %q", core.FilesDir, wantPrefix)
}
if !strings.HasSuffix(core.FilesDir, filepath.Join("dev-lead", "core-lead")) {
t.Errorf("core.FilesDir = %q; want suffix dev-lead/core-lead", core.FilesDir)
}
}
// TestResolveExternalMapping_AllowlistRejection: hostile yaml pointing
// at a non-allowlisted repo gets rejected.
func TestResolveExternalMapping_AllowlistRejection(t *testing.T) {
tmp := t.TempDir()
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
cleanup := SetExternalFetcherForTest(fake)
defer cleanup()
// Default allowlist is git.moleculesai.app/molecule-ai/*.
// github.com/foo/bar is NOT in it.
src := []byte(`workspaces:
- !external
repo: foo/bar
ref: main
path: x.yaml
url: github.com
`)
_, err := resolveYAMLIncludes(src, tmp)
if err == nil {
t.Fatalf("expected allowlist rejection, got nil")
}
if !strings.Contains(err.Error(), "MOLECULE_EXTERNAL_REPO_ALLOWLIST") {
t.Errorf("expected allowlist error; got %v", err)
}
}
// TestResolveExternalMapping_PathTraversalRejection: hostile yaml
// with `path: ../../etc/passwd` gets rejected before fetch.
func TestResolveExternalMapping_PathTraversalRejection(t *testing.T) {
tmp := t.TempDir()
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
cleanup := SetExternalFetcherForTest(fake)
defer cleanup()
src := []byte(`workspaces:
- !external
repo: molecule-ai/dev-department
ref: main
path: ../../etc/passwd
`)
_, err := resolveYAMLIncludes(src, tmp)
if err == nil {
t.Fatalf("expected path traversal rejection, got nil")
}
if !strings.Contains(err.Error(), "relative-and-down-only") {
t.Errorf("expected path traversal error; got %v", err)
}
}
// TestResolveExternalMapping_BadRefRejection: non-allowlisted ref chars.
func TestResolveExternalMapping_BadRefRejection(t *testing.T) {
tmp := t.TempDir()
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
cleanup := SetExternalFetcherForTest(fake)
defer cleanup()
src := []byte(`workspaces:
- !external
repo: molecule-ai/dev-department
ref: "main; rm -rf /"
path: foo.yaml
`)
_, err := resolveYAMLIncludes(src, tmp)
if err == nil || !strings.Contains(err.Error(), "disallowed characters") {
t.Errorf("expected ref-validation error; got %v", err)
}
}
// TestResolveExternalMapping_MissingRequiredFields: repo / ref / path
// are all required.
func TestResolveExternalMapping_MissingRequiredFields(t *testing.T) {
tmp := t.TempDir()
fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
cleanup := SetExternalFetcherForTest(fake)
defer cleanup()
cases := []string{
// missing repo
`workspaces:
- !external
ref: main
path: x.yaml
`,
// missing ref
`workspaces:
- !external
repo: molecule-ai/x
path: x.yaml
`,
// missing path
`workspaces:
- !external
repo: molecule-ai/x
ref: main
`,
}
for i, src := range cases {
_, err := resolveYAMLIncludes([]byte(src), tmp)
if err == nil {
t.Errorf("case %d: expected required-field error, got nil", i)
} else if !strings.Contains(err.Error(), "required") {
t.Errorf("case %d: want 'required' in error; got %v", i, err)
}
}
}
// TestRewriteFilesDirAndIncludes: verify the path-rewrite walker
// prefixes files_dir scalars. !include scalars are NOT rewritten —
// they resolve relative to their containing file's dir, which post-
// fetch is naturally inside the cache.
func TestRewriteFilesDirAndIncludes(t *testing.T) {
src := `name: Foo
files_dir: dev-lead
children:
- !include ./bar/workspace.yaml
- !include other-team.yaml
inner:
files_dir: dev-lead/sub
`
var n yaml.Node
if err := yaml.Unmarshal([]byte(src), &n); err != nil {
t.Fatal(err)
}
rewriteFilesDirAndIncludes(&n, ".external-cache/foo/bar")
out, err := yaml.Marshal(&n)
if err != nil {
t.Fatal(err)
}
got := string(out)
for _, want := range []string{
"files_dir: .external-cache/foo/bar/dev-lead",
"files_dir: .external-cache/foo/bar/dev-lead/sub",
// !include preserved as-is; resolves naturally via subDir.
"!include ./bar/workspace.yaml",
"!include other-team.yaml",
} {
if !strings.Contains(got, want) {
t.Errorf("missing %q in:\n%s", want, got)
}
}
}
// TestRewriteFilesDirAndIncludes_Idempotent: re-running the rewriter
// on already-prefixed files_dir doesn't double-prefix.
func TestRewriteFilesDirAndIncludes_Idempotent(t *testing.T) {
src := `files_dir: .external-cache/foo/bar/dev-lead
inner:
files_dir: .external-cache/foo/bar/dev-lead/sub
`
var n yaml.Node
if err := yaml.Unmarshal([]byte(src), &n); err != nil {
t.Fatal(err)
}
rewriteFilesDirAndIncludes(&n, ".external-cache/foo/bar")
out, _ := yaml.Marshal(&n)
got := string(out)
if strings.Contains(got, ".external-cache/foo/bar/.external-cache") {
t.Errorf("double-prefix detected:\n%s", got)
}
// Should still be valid (single-prefixed) afterwards.
for _, want := range []string{
"files_dir: .external-cache/foo/bar/dev-lead",
"files_dir: .external-cache/foo/bar/dev-lead/sub",
} {
if !strings.Contains(got, want) {
t.Errorf("expected unchanged %q in:\n%s", want, got)
}
}
}
// TestAllowlistedHostPath: env-var override + glob matching.
func TestAllowlistedHostPath(t *testing.T) {
t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "")
if !allowlistedHostPath("git.moleculesai.app", "molecule-ai/foo") {
t.Error("default allowlist should accept molecule-ai/*")
}
if allowlistedHostPath("github.com", "molecule-ai/foo") {
t.Error("default allowlist should reject github.com")
}
t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "github.com/me/*,git.moleculesai.app/*")
if !allowlistedHostPath("github.com", "me/x") {
t.Error("override should accept github.com/me/*")
}
if !allowlistedHostPath("git.moleculesai.app", "any/repo") {
t.Error("override should accept git.moleculesai.app/*")
}
if allowlistedHostPath("github.com", "evil/x") {
t.Error("override should reject github.com/evil/*")
}
}

View File

@ -76,6 +76,12 @@ func expandNode(n *yaml.Node, currentDir, rootDir string, visited map[string]boo
return resolveIncludeScalar(n, currentDir, rootDir, visited, depth)
}
// `!external`-tagged mapping: gitops cross-repo subtree composition.
// See org_external.go (internal#77 / task #222).
if n.Kind == yaml.MappingNode && n.Tag == "!external" {
return resolveExternalMapping(n, currentDir, rootDir, visited, depth)
}
for _, child := range n.Content {
if err := expandNode(child, currentDir, rootDir, visited, depth); err != nil {
return err