fix(validator): match platform's path-string symlink semantics + add local-e2e setup script

Two follow-ups discovered while pre-flighting local platform spin-up
(internal#77 dev-department extraction):

VALIDATOR (closes task #231)
  Previous validate-tree.py used Path.resolve() everywhere — Python's
  realpath equivalent that follows symlinks. Caused false positives on
  parent template's cross-repo symlink (dev-lead → sibling repo):
  validator reported 'resolves outside repo root' even though the
  platform's resolveYAMLIncludes (workspace-server/internal/handlers/
  org_include.go) accepts the path because filepath.Abs/Rel operate on
  path STRINGS, not on the realpath.

  Fix: introduce _abs_no_symlink_resolve() helper using os.path.abspath
  (string-only) and _is_inside_root() that mirrors Go's filepath.Rel +
  HasPrefix idiom. All places where the validator emulates the platform
  security check now use these helpers; Path.resolve() is reserved for
  cases that need realpath semantics (none currently).

  Verified: validator now passes on parent template's dev-lead symlink
  (was previously a hard error), still rejects truly-out-of-root
  references (e.g. ../../../etc/passwd-style escapes).

LOCAL E2E SETUP SCRIPT (DX win)
  New .molecule-ci/scripts/local-e2e-setup.sh — bootstraps the
  /tmp/local-e2e-deploy/ sibling-clone fixture used by
  workspace-server's TestLocalE2E_* tests. Idempotent (pulls latest)
  with --fresh to wipe and re-clone. Exits non-zero with diagnostics
  if the parent template's dev-lead symlink is missing or broken
  (catches stale parent-template clones from before PR #5).

Refs:
  internal#77 — extraction RFC
  task #231 — validator-vs-platform symlink semantics
  Hongming GO 2026-05-08 ('go' on the 3 pre-spin-up optimizations)
This commit is contained in:
claude-ceo-assistant 2026-05-08 04:46:50 -07:00
parent 321979e4dc
commit f474e9c942
2 changed files with 128 additions and 15 deletions

View File

@ -0,0 +1,77 @@
#!/usr/bin/env bash
# local-e2e-setup.sh — bootstrap the local sibling-clone fixture for
# end-to-end testing of the dev-department extraction (RFC internal#77).
#
# Sets up:
# /tmp/local-e2e-deploy/
# ├── molecule-dev/ ← parent template (symlink wires dev-lead)
# └── molecule-dev-department/ ← extracted dev tree
#
# After running, both Go tests are exercisable:
# cd <molecule-core>/workspace-server
# go test -v -run TestLocalE2E_DevDepartmentExtraction ./internal/handlers/
# go test -v -run TestLocalE2E_FilesDirConsumption ./internal/handlers/
#
# Idempotent: re-running pulls latest from both repos. Pass --fresh to
# wipe and re-clone.
set -euo pipefail
ROOT="${LOCAL_E2E_ROOT:-/tmp/local-e2e-deploy}"
GITEA="${GITEA_URL:-https://git.moleculesai.app}"
TOKEN_PATH="${HOME}/.molecule-ai/gitea-token"
PARENT_REPO="molecule-ai-org-template-molecule-dev"
PARENT_DIR_NAME="molecule-dev" # dir name parent expects (matches operator convention)
SUBTREE_REPO="molecule-dev-department"
if [[ "${1:-}" == "--fresh" ]]; then
echo "[fresh] wiping ${ROOT}"
rm -rf "${ROOT}"
fi
mkdir -p "${ROOT}"
cd "${ROOT}"
if [[ ! -f "${TOKEN_PATH}" ]]; then
echo "ERROR: gitea token not at ${TOKEN_PATH}" >&2
exit 2
fi
TOKEN="$(cat "${TOKEN_PATH}")"
clone_or_pull() {
local repo="$1" dir="$2"
local url="https://oauth2:${TOKEN}@${GITEA#https://}/molecule-ai/${repo}.git"
if [[ -d "${dir}/.git" ]]; then
echo "[pull] ${dir}"
git -C "${dir}" pull --ff-only --quiet
else
echo "[clone] ${repo}${dir}"
git clone --quiet "${url}" "${dir}"
fi
}
clone_or_pull "${PARENT_REPO}" "${PARENT_DIR_NAME}"
clone_or_pull "${SUBTREE_REPO}" "${SUBTREE_REPO}"
# Sanity: parent's dev-lead symlink target resolves to subtree's dev-lead.
SYMLINK="${PARENT_DIR_NAME}/dev-lead"
if [[ ! -L "${SYMLINK}" ]]; then
echo "ERROR: ${SYMLINK} is not a symlink — parent template's PR #5 (slim) may not be deployed yet" >&2
exit 3
fi
if [[ ! -f "${SYMLINK}/workspace.yaml" ]]; then
echo "ERROR: ${SYMLINK}/workspace.yaml does not resolve — symlink target missing" >&2
ls -la "${SYMLINK}" >&2
exit 4
fi
echo ""
echo "== ready =="
echo " parent : ${ROOT}/${PARENT_DIR_NAME}"
echo " subtree: ${ROOT}/${SUBTREE_REPO}"
echo " symlink: $(ls -la "${SYMLINK}" | awk '{print $NF}')"
echo ""
echo "Run tests:"
echo " cd <molecule-core>/workspace-server"
echo " go test -v -run TestLocalE2E_ ./internal/handlers/"

View File

@ -44,6 +44,38 @@ import sys
from pathlib import Path
from typing import Any
def _abs_no_symlink_resolve(p: Path) -> Path:
"""Return absolute path WITHOUT following symlinks. Mirrors Go's
filepath.Abs (which is path-string only does not call EvalSymlinks).
This matters for cross-repo composition: parent template's
`dev-lead ../sibling-repo/dev-lead/` symlink. The platform's
resolveYAMLIncludes (workspace-server/internal/handlers/org_include.go)
uses filepath.Abs/Rel for the in-root security check, so a path that
string-resolves to inside-root passes even if the symlink target
lives outside. The actual file-read follows the symlink at OS layer
via os.ReadFile.
Path.resolve() in Python's stdlib follows symlinks (it's like
realpath), which would reject the cross-repo case as "outside root".
Use this helper everywhere we mirror the Go security check; reserve
Path.resolve() for places we actually want realpath behavior."""
return Path(os.path.abspath(p))
def _is_inside_root(target: Path, root: Path) -> bool:
"""Path-string check: is `target` lexically inside `root`?
Mirrors Go's filepath.Rel + HasPrefix idiom from
resolveInsideRoot in workspace-server."""
target_abs = _abs_no_symlink_resolve(target)
root_abs = _abs_no_symlink_resolve(root)
try:
target_abs.relative_to(root_abs)
except ValueError:
return False
return True
try:
import yaml # PyYAML
except ImportError:
@ -130,12 +162,13 @@ def _walk_workspace_node(
# !include sentinel.
if isinstance(node, dict) and "__include__" in node:
rel = node["__include__"]
target = (yaml_dir / rel).resolve()
try:
target.relative_to(repo_root.resolve())
except ValueError:
# Path-string in-root check (mirrors Go filepath.Abs/Rel — does
# NOT follow symlinks at this layer, so cross-repo symlinks like
# parent's dev-lead → ../molecule-dev-department/dev-lead/ pass).
target = _abs_no_symlink_resolve(yaml_dir / rel)
if not _is_inside_root(target, repo_root):
report.errors.append(
f"!include {rel!r} (from {yaml_dir.name}) resolves outside repo root: {target}"
f"!include {rel!r} (from {yaml_dir.name}) escapes repo root (path-string check): {target}"
)
return
if not target.exists():
@ -156,17 +189,18 @@ def _walk_workspace_node(
# no workspace.yaml) — recurse without registering.
child_folder: str | None = None
if target.name == "workspace.yaml":
child_folder = str(target.parent.resolve().relative_to(repo_root.resolve()))
# parent dir as a path STRING (no symlink follow) relative to repo root.
parent_abs = _abs_no_symlink_resolve(target.parent)
child_folder = str(parent_abs.relative_to(_abs_no_symlink_resolve(repo_root)))
elif isinstance(sub, dict) and sub.get("files_dir"):
fd = sub["files_dir"]
fd_resolved = (repo_root / fd).resolve()
try:
child_folder = str(fd_resolved.relative_to(repo_root.resolve()))
except ValueError:
fd_path = _abs_no_symlink_resolve(repo_root / fd)
if not _is_inside_root(fd_path, repo_root):
report.errors.append(
f"!include {rel!r} declares files_dir {fd!r} outside repo root"
)
return
child_folder = str(fd_path.relative_to(_abs_no_symlink_resolve(repo_root)))
# Cross-tree `..` ref check on the path the user wrote.
if child_folder is not None and parent_folder is not None:
@ -194,12 +228,14 @@ def _walk_workspace_node(
files_dir = node.get("files_dir")
current_folder = parent_folder
if files_dir:
fd_resolved = (repo_root / files_dir).resolve()
try:
this_folder = str(fd_resolved.relative_to(repo_root.resolve()))
except ValueError:
report.errors.append(f"files_dir {files_dir!r} escapes repo root")
# Path-string check: mirror Go filepath.Abs/Rel — do NOT
# follow symlinks at this layer. Cross-repo symlinks
# (parent's dev-lead → sibling-repo) are intentional.
fd_path = _abs_no_symlink_resolve(repo_root / files_dir)
if not _is_inside_root(fd_path, repo_root):
report.errors.append(f"files_dir {files_dir!r} escapes repo root (path-string)")
return
this_folder = str(fd_path.relative_to(_abs_no_symlink_resolve(repo_root)))
if not skip_files_dir_register:
report.add_edge(parent_folder or "<root>", this_folder)
current_folder = this_folder