From f474e9c94224e3e05c19b1a3504af4c64ef46e00 Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Fri, 8 May 2026 04:46:50 -0700 Subject: [PATCH] fix(validator): match platform's path-string symlink semantics + add local-e2e setup script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups discovered while pre-flighting local platform spin-up (internal#77 dev-department extraction): VALIDATOR (closes task #231) Previous validate-tree.py used Path.resolve() everywhere — Python's realpath equivalent that follows symlinks. Caused false positives on parent template's cross-repo symlink (dev-lead → sibling repo): validator reported 'resolves outside repo root' even though the platform's resolveYAMLIncludes (workspace-server/internal/handlers/ org_include.go) accepts the path because filepath.Abs/Rel operate on path STRINGS, not on the realpath. Fix: introduce _abs_no_symlink_resolve() helper using os.path.abspath (string-only) and _is_inside_root() that mirrors Go's filepath.Rel + HasPrefix idiom. All places where the validator emulates the platform security check now use these helpers; Path.resolve() is reserved for cases that need realpath semantics (none currently). Verified: validator now passes on parent template's dev-lead symlink (was previously a hard error), still rejects truly-out-of-root references (e.g. ../../../etc/passwd-style escapes). LOCAL E2E SETUP SCRIPT (DX win) New .molecule-ci/scripts/local-e2e-setup.sh — bootstraps the /tmp/local-e2e-deploy/ sibling-clone fixture used by workspace-server's TestLocalE2E_* tests. Idempotent (pulls latest) with --fresh to wipe and re-clone. Exits non-zero with diagnostics if the parent template's dev-lead symlink is missing or broken (catches stale parent-template clones from before PR #5). Refs: internal#77 — extraction RFC task #231 — validator-vs-platform symlink semantics Hongming GO 2026-05-08 ('go' on the 3 pre-spin-up optimizations) --- .molecule-ci/scripts/local-e2e-setup.sh | 77 +++++++++++++++++++++++++ .molecule-ci/scripts/validate-tree.py | 66 ++++++++++++++++----- 2 files changed, 128 insertions(+), 15 deletions(-) create mode 100755 .molecule-ci/scripts/local-e2e-setup.sh diff --git a/.molecule-ci/scripts/local-e2e-setup.sh b/.molecule-ci/scripts/local-e2e-setup.sh new file mode 100755 index 0000000..adb8796 --- /dev/null +++ b/.molecule-ci/scripts/local-e2e-setup.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# local-e2e-setup.sh — bootstrap the local sibling-clone fixture for +# end-to-end testing of the dev-department extraction (RFC internal#77). +# +# Sets up: +# /tmp/local-e2e-deploy/ +# ├── molecule-dev/ ← parent template (symlink wires dev-lead) +# └── molecule-dev-department/ ← extracted dev tree +# +# After running, both Go tests are exercisable: +# cd /workspace-server +# go test -v -run TestLocalE2E_DevDepartmentExtraction ./internal/handlers/ +# go test -v -run TestLocalE2E_FilesDirConsumption ./internal/handlers/ +# +# Idempotent: re-running pulls latest from both repos. Pass --fresh to +# wipe and re-clone. + +set -euo pipefail + +ROOT="${LOCAL_E2E_ROOT:-/tmp/local-e2e-deploy}" +GITEA="${GITEA_URL:-https://git.moleculesai.app}" +TOKEN_PATH="${HOME}/.molecule-ai/gitea-token" + +PARENT_REPO="molecule-ai-org-template-molecule-dev" +PARENT_DIR_NAME="molecule-dev" # dir name parent expects (matches operator convention) +SUBTREE_REPO="molecule-dev-department" + +if [[ "${1:-}" == "--fresh" ]]; then + echo "[fresh] wiping ${ROOT}" + rm -rf "${ROOT}" +fi + +mkdir -p "${ROOT}" +cd "${ROOT}" + +if [[ ! -f "${TOKEN_PATH}" ]]; then + echo "ERROR: gitea token not at ${TOKEN_PATH}" >&2 + exit 2 +fi +TOKEN="$(cat "${TOKEN_PATH}")" + +clone_or_pull() { + local repo="$1" dir="$2" + local url="https://oauth2:${TOKEN}@${GITEA#https://}/molecule-ai/${repo}.git" + if [[ -d "${dir}/.git" ]]; then + echo "[pull] ${dir}" + git -C "${dir}" pull --ff-only --quiet + else + echo "[clone] ${repo} → ${dir}" + git clone --quiet "${url}" "${dir}" + fi +} + +clone_or_pull "${PARENT_REPO}" "${PARENT_DIR_NAME}" +clone_or_pull "${SUBTREE_REPO}" "${SUBTREE_REPO}" + +# Sanity: parent's dev-lead symlink target resolves to subtree's dev-lead. +SYMLINK="${PARENT_DIR_NAME}/dev-lead" +if [[ ! -L "${SYMLINK}" ]]; then + echo "ERROR: ${SYMLINK} is not a symlink — parent template's PR #5 (slim) may not be deployed yet" >&2 + exit 3 +fi +if [[ ! -f "${SYMLINK}/workspace.yaml" ]]; then + echo "ERROR: ${SYMLINK}/workspace.yaml does not resolve — symlink target missing" >&2 + ls -la "${SYMLINK}" >&2 + exit 4 +fi + +echo "" +echo "== ready ==" +echo " parent : ${ROOT}/${PARENT_DIR_NAME}" +echo " subtree: ${ROOT}/${SUBTREE_REPO}" +echo " symlink: $(ls -la "${SYMLINK}" | awk '{print $NF}')" +echo "" +echo "Run tests:" +echo " cd /workspace-server" +echo " go test -v -run TestLocalE2E_ ./internal/handlers/" diff --git a/.molecule-ci/scripts/validate-tree.py b/.molecule-ci/scripts/validate-tree.py index 08ea425..66b65b7 100755 --- a/.molecule-ci/scripts/validate-tree.py +++ b/.molecule-ci/scripts/validate-tree.py @@ -44,6 +44,38 @@ import sys from pathlib import Path from typing import Any + +def _abs_no_symlink_resolve(p: Path) -> Path: + """Return absolute path WITHOUT following symlinks. Mirrors Go's + filepath.Abs (which is path-string only — does not call EvalSymlinks). + + This matters for cross-repo composition: parent template's + `dev-lead → ../sibling-repo/dev-lead/` symlink. The platform's + resolveYAMLIncludes (workspace-server/internal/handlers/org_include.go) + uses filepath.Abs/Rel for the in-root security check, so a path that + string-resolves to inside-root passes — even if the symlink target + lives outside. The actual file-read follows the symlink at OS layer + via os.ReadFile. + + Path.resolve() in Python's stdlib follows symlinks (it's like + realpath), which would reject the cross-repo case as "outside root". + Use this helper everywhere we mirror the Go security check; reserve + Path.resolve() for places we actually want realpath behavior.""" + return Path(os.path.abspath(p)) + + +def _is_inside_root(target: Path, root: Path) -> bool: + """Path-string check: is `target` lexically inside `root`? + Mirrors Go's filepath.Rel + HasPrefix idiom from + resolveInsideRoot in workspace-server.""" + target_abs = _abs_no_symlink_resolve(target) + root_abs = _abs_no_symlink_resolve(root) + try: + target_abs.relative_to(root_abs) + except ValueError: + return False + return True + try: import yaml # PyYAML except ImportError: @@ -130,12 +162,13 @@ def _walk_workspace_node( # !include sentinel. if isinstance(node, dict) and "__include__" in node: rel = node["__include__"] - target = (yaml_dir / rel).resolve() - try: - target.relative_to(repo_root.resolve()) - except ValueError: + # Path-string in-root check (mirrors Go filepath.Abs/Rel — does + # NOT follow symlinks at this layer, so cross-repo symlinks like + # parent's dev-lead → ../molecule-dev-department/dev-lead/ pass). + target = _abs_no_symlink_resolve(yaml_dir / rel) + if not _is_inside_root(target, repo_root): report.errors.append( - f"!include {rel!r} (from {yaml_dir.name}) resolves outside repo root: {target}" + f"!include {rel!r} (from {yaml_dir.name}) escapes repo root (path-string check): {target}" ) return if not target.exists(): @@ -156,17 +189,18 @@ def _walk_workspace_node( # no workspace.yaml) — recurse without registering. child_folder: str | None = None if target.name == "workspace.yaml": - child_folder = str(target.parent.resolve().relative_to(repo_root.resolve())) + # parent dir as a path STRING (no symlink follow) relative to repo root. + parent_abs = _abs_no_symlink_resolve(target.parent) + child_folder = str(parent_abs.relative_to(_abs_no_symlink_resolve(repo_root))) elif isinstance(sub, dict) and sub.get("files_dir"): fd = sub["files_dir"] - fd_resolved = (repo_root / fd).resolve() - try: - child_folder = str(fd_resolved.relative_to(repo_root.resolve())) - except ValueError: + fd_path = _abs_no_symlink_resolve(repo_root / fd) + if not _is_inside_root(fd_path, repo_root): report.errors.append( f"!include {rel!r} declares files_dir {fd!r} outside repo root" ) return + child_folder = str(fd_path.relative_to(_abs_no_symlink_resolve(repo_root))) # Cross-tree `..` ref check on the path the user wrote. if child_folder is not None and parent_folder is not None: @@ -194,12 +228,14 @@ def _walk_workspace_node( files_dir = node.get("files_dir") current_folder = parent_folder if files_dir: - fd_resolved = (repo_root / files_dir).resolve() - try: - this_folder = str(fd_resolved.relative_to(repo_root.resolve())) - except ValueError: - report.errors.append(f"files_dir {files_dir!r} escapes repo root") + # Path-string check: mirror Go filepath.Abs/Rel — do NOT + # follow symlinks at this layer. Cross-repo symlinks + # (parent's dev-lead → sibling-repo) are intentional. + fd_path = _abs_no_symlink_resolve(repo_root / files_dir) + if not _is_inside_root(fd_path, repo_root): + report.errors.append(f"files_dir {files_dir!r} escapes repo root (path-string)") return + this_folder = str(fd_path.relative_to(_abs_no_symlink_resolve(repo_root))) if not skip_files_dir_register: report.add_edge(parent_folder or "", this_folder) current_folder = this_folder -- 2.45.2