From d47c15d526511456a073156ee28f7c5b9020974d Mon Sep 17 00:00:00 2001 From: dev-lead Date: Fri, 8 May 2026 08:52:32 -0700 Subject: [PATCH] fix(validate): recognize !external + !include as opaque refs (skip, not error) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit molecule-ai-org-template-molecule-dev's CI has been red since the "pin: dev-department v1.0.0" merge. Symptom: ::error::Workspace at : missing 'name' ::error::Workspace at : missing 'name' Root cause: org.yaml uses `!external` for the dev-department subtree fetch (introduced internal#77 / molecule-core#105). The PermissiveLoader formerly handed every unknown tag to a single multi-constructor that flattens the parsed value to a plain dict. The validator's validate_workspace() then saw a dict with no `name` key and tripped the "missing name" error — but the dict was a `!external` directive, not a malformed workspace. The fix wraps both supported tags in distinct sentinel types: - !include → IncludeRef (str subclass) - !external → ExternalRef (dict subclass) validate_workspace() and count_ws() now skip these instead of treating them as workspace shape. Real workspace dicts (with names) still get the full structural check. Unknown tags fall through to the multi-constructor exactly as before, preserving back-compat. Verified on the live failing org.yaml: ✓ org.yaml valid: Molecule AI Dev Team (0 direct workspaces; external refs not counted) And on a synthetic case with one real bug (missing-name workspace nested under children): ::error::Workspace at : missing 'name' ::error::Workspace at /: missing 'name' exit 1 So the validator still catches real shape bugs; it just doesn't false-positive on the new !external pattern. Co-Authored-By: Claude Opus 4.7 (1M context) --- .molecule-ci/scripts/validate-org-template.py | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/.molecule-ci/scripts/validate-org-template.py b/.molecule-ci/scripts/validate-org-template.py index 75484a5..4e727a9 100644 --- a/.molecule-ci/scripts/validate-org-template.py +++ b/.molecule-ci/scripts/validate-org-template.py @@ -2,19 +2,47 @@ """Validate a Molecule AI org template repo.""" import os, sys, yaml -# Support !include and other custom YAML tags used by org templates. -# These resolve at platform load time, not at validation time — we just -# need to parse past them without crashing. +# Support custom YAML tags used by org templates. Two shapes: +# +# - `!include teams/pm.yaml` → scalar string referencing another YAML +# file in the same repo. Platform inlines at load time. +# +# - `!external\n repo: ...\n ref: ...\n path: ...` → mapping +# referencing a workspace tree to fetch from another repo. Platform +# fetches into a content-addressable cache at load time +# (internal#77 / molecule-core#105). +# +# Both shapes resolve at platform load time, not at validation time. +# The validator treats them as opaque references — it does NOT chase +# them down. We mark each parsed value with a sentinel subtype so the +# `validate_workspace` walk knows to skip them rather than tripping +# the "missing 'name'" branch. +class IncludeRef(str): + """`!include path/to.yaml` — opaque reference, skipped by validator.""" + +class ExternalRef(dict): + """`!external` mapping — opaque reference, skipped by validator.""" + class PermissiveLoader(yaml.SafeLoader): pass +def _include_constructor(loader, node): + return IncludeRef(loader.construct_scalar(node)) + +def _external_constructor(loader, node): + return ExternalRef(loader.construct_mapping(node)) + def _generic_constructor(loader, tag_suffix, node): + # Fallback for unknown tags. Preserve the parsed shape so legacy + # docs that lean on tags we have not modeled yet still parse. if isinstance(node, yaml.MappingNode): return loader.construct_mapping(node) if isinstance(node, yaml.SequenceNode): return loader.construct_sequence(node) return loader.construct_scalar(node) +PermissiveLoader.add_constructor("!include", _include_constructor) +PermissiveLoader.add_constructor("!external", _external_constructor) PermissiveLoader.add_multi_constructor("!", _generic_constructor) errors = [] @@ -33,7 +61,13 @@ if not org.get("workspaces") and not org.get("defaults"): errors.append("org.yaml must have at least 'workspaces' or 'defaults'") def validate_workspace(ws, path=""): - # !include tags resolve to strings at parse time; skip non-dicts + # `!include path/to.yaml` parses as IncludeRef (str subclass). + # `!external {repo, ref, path}` parses as ExternalRef (dict subclass). + # Both are opaque references — skip without chasing. + if isinstance(ws, (IncludeRef, ExternalRef)): + return [] + # Legacy unknown-tag scalars (handled by _generic_constructor) stay + # as plain strings; they are not workspace dicts either. if not isinstance(ws, dict): return [] ws_errors = [] @@ -59,6 +93,11 @@ if errors: def count_ws(nodes): c = 0 for n in nodes: + # Skip opaque references — we do not know how many workspaces + # they expand to without resolving them, and resolution is the + # platform's job, not the validator's. + if isinstance(n, (IncludeRef, ExternalRef)): + continue if not isinstance(n, dict): continue c += 1 @@ -66,4 +105,4 @@ def count_ws(nodes): return c total = count_ws(org.get("workspaces", [])) -print(f"✓ org.yaml valid: {org['name']} ({total} workspaces)") +print(f"✓ org.yaml valid: {org['name']} ({total} direct workspaces; external refs not counted)")