feat(validate): platform-model SSOT drift gate (template ⊆ manifest) #24

Merged
hongming merged 2 commits from feat/platform-models-ssot-drift-gate into main 2026-05-27 10:58:14 +00:00
2 changed files with 298 additions and 0 deletions
+166
View File
@@ -683,3 +683,169 @@ def test_runtime_not_installed_warns_not_errors(validator, tmp_path, monkeypatch
"skipping runtime-load check" in w
for w in validator.WARNINGS
), validator.WARNINGS
# ──────────────────────────────── platform-model SSOT drift gate
def _manifest_fixture() -> str:
"""Minimal controlplane providers manifest: only the runtimes block the
drift gate reads."""
return (
"schema_version: 1\n"
"runtimes:\n"
" hermes:\n"
" providers:\n"
" - name: kimi-coding\n"
" models: [kimi-coding/kimi-k2]\n"
" - name: platform\n"
" models: [moonshot/kimi-k2.6, moonshot/kimi-k2.5]\n"
)
def _config_with_platform(runtime: str, platform_ids: list[str]) -> str:
lines = [
"name: t\n",
f"runtime: {runtime}\n",
"template_schema_version: 1\n",
"runtime_config:\n",
" models:\n",
" - id: kimi-coding/kimi-k2\n",
" required_env: [KIMI_API_KEY]\n",
]
for mid in platform_ids:
lines += [f" - id: {mid}\n", " provider: platform\n", " required_env: []\n"]
return "".join(lines)
def _setup_drift(tmp_path, monkeypatch, config_yaml, manifest_text=None):
(tmp_path / "config.yaml").write_text(config_yaml)
if manifest_text is not None:
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_text)
monkeypatch.setenv("PROVIDERS_MANIFEST_FILE", str(mp))
monkeypatch.chdir(tmp_path)
def test_platform_models_subset_passes(validator, tmp_path, monkeypatch):
_setup_drift(tmp_path, monkeypatch,
_config_with_platform("hermes", ["moonshot/kimi-k2.6"]),
_manifest_fixture())
validator.check_platform_models()
assert validator.ERRORS == [], validator.ERRORS
assert validator.WARNINGS == [], validator.WARNINGS
def test_platform_model_not_in_manifest_errors(validator, tmp_path, monkeypatch):
_setup_drift(tmp_path, monkeypatch,
_config_with_platform("hermes", ["moonshot/kimi-k2.6", "moonshot/kimi-k2.99"]),
_manifest_fixture())
validator.check_platform_models()
assert any("kimi-k2.99" in e for e in validator.ERRORS), validator.ERRORS
def test_no_platform_models_skips(validator, tmp_path, monkeypatch):
cfg = (
"name: t\nruntime: hermes\ntemplate_schema_version: 1\n"
"runtime_config:\n models:\n - id: kimi-coding/kimi-k2\n required_env: [KIMI_API_KEY]\n"
)
_setup_drift(tmp_path, monkeypatch, cfg, _manifest_fixture())
validator.check_platform_models()
assert validator.ERRORS == []
assert validator.WARNINGS == []
def test_manifest_unreachable_warns_not_errors(validator, tmp_path, monkeypatch):
_setup_drift(tmp_path, monkeypatch,
_config_with_platform("hermes", ["moonshot/kimi-k2.6"]))
# Point at a path that does not exist -> fetch returns None -> warn-skip.
monkeypatch.setenv("PROVIDERS_MANIFEST_FILE", str(tmp_path / "nope.yaml"))
validator.check_platform_models()
assert validator.ERRORS == [], validator.ERRORS
assert any("drift check skipped" in w for w in validator.WARNINGS), validator.WARNINGS
def test_runtime_absent_from_manifest_warns(validator, tmp_path, monkeypatch):
_setup_drift(tmp_path, monkeypatch,
_config_with_platform("mystery-runtime", ["moonshot/kimi-k2.6"]),
_manifest_fixture())
validator.check_platform_models()
assert validator.ERRORS == [], validator.ERRORS
assert any("not in the controlplane providers manifest" in w for w in validator.WARNINGS), validator.WARNINGS
def test_manifest_fetch_via_real_git_clone(validator, tmp_path, monkeypatch):
"""Exercise the REAL blobless+sparse git clone fetch path (not the
PROVIDERS_MANIFEST_FILE short-circuit) against a local file:// repo.
Regression guard: the sparse-checkout must use a DIRECTORY (cone mode),
not a file path — the file-path form silently failed -> WARN-skip, so the
gate never blocked via the live path."""
import os as _os
import shutil as _shutil
import subprocess as _sp
if _shutil.which("git") is None:
import pytest as _pytest
_pytest.skip("git not available")
# Build a source repo containing internal/providers/providers.yaml.
src = tmp_path / "cp-src"
(src / "internal" / "providers").mkdir(parents=True)
(src / "internal" / "providers" / "providers.yaml").write_text(_manifest_fixture())
(src / "README.md").write_text("root file so the sparse cone has a base\n")
genv = {
**_os.environ,
"GIT_AUTHOR_NAME": "t", "GIT_AUTHOR_EMAIL": "t@t",
"GIT_COMMITTER_NAME": "t", "GIT_COMMITTER_EMAIL": "t@t",
}
_sp.run(["git", "init", "-q", "-b", "main", str(src)], check=True, capture_output=True)
_sp.run(["git", "-C", str(src), "add", "-A"], check=True, capture_output=True, env=genv)
_sp.run(["git", "-C", str(src), "commit", "-q", "-m", "init"], check=True, capture_output=True, env=genv)
# The template under validation (cwd) — hermes offering an in-manifest model.
tmpl = tmp_path / "tmpl"
tmpl.mkdir()
(tmpl / "config.yaml").write_text(_config_with_platform("hermes", ["moonshot/kimi-k2.6"]))
monkeypatch.chdir(tmpl)
monkeypatch.delenv("PROVIDERS_MANIFEST_FILE", raising=False)
# file:// enables --filter on a local clone (plain paths ignore it).
monkeypatch.setenv("PROVIDERS_MANIFEST_REPO", "file://" + str(src))
validator.check_platform_models()
# Fetch succeeded via the clone path -> subset holds -> no error, no skip-warn.
assert validator.ERRORS == [], validator.ERRORS
assert validator.WARNINGS == [], validator.WARNINGS
def test_real_git_clone_detects_drift(validator, tmp_path, monkeypatch):
"""Same real-clone path, but the template offers a platform model NOT in
the fetched manifest -> must err (proves the live path actually gates)."""
import os as _os
import shutil as _shutil
import subprocess as _sp
if _shutil.which("git") is None:
import pytest as _pytest
_pytest.skip("git not available")
src = tmp_path / "cp-src2"
(src / "internal" / "providers").mkdir(parents=True)
(src / "internal" / "providers" / "providers.yaml").write_text(_manifest_fixture())
(src / "README.md").write_text("x\n")
genv = {
**_os.environ,
"GIT_AUTHOR_NAME": "t", "GIT_AUTHOR_EMAIL": "t@t",
"GIT_COMMITTER_NAME": "t", "GIT_COMMITTER_EMAIL": "t@t",
}
_sp.run(["git", "init", "-q", "-b", "main", str(src)], check=True, capture_output=True)
_sp.run(["git", "-C", str(src), "add", "-A"], check=True, capture_output=True, env=genv)
_sp.run(["git", "-C", str(src), "commit", "-q", "-m", "init"], check=True, capture_output=True, env=genv)
tmpl = tmp_path / "tmpl2"
tmpl.mkdir()
(tmpl / "config.yaml").write_text(_config_with_platform("hermes", ["moonshot/kimi-k2.99"]))
monkeypatch.chdir(tmpl)
monkeypatch.delenv("PROVIDERS_MANIFEST_FILE", raising=False)
monkeypatch.setenv("PROVIDERS_MANIFEST_REPO", "file://" + str(src))
validator.check_platform_models()
assert any("kimi-k2.99" in e for e in validator.ERRORS), validator.ERRORS
+132
View File
@@ -411,6 +411,137 @@ def check_adapter_runtime_load() -> None:
)
# ───────────────────────────────── platform-model SSOT drift gate
#
# The controlplane providers manifest (internal/providers/providers.yaml
# `runtimes:` block) is the SINGLE source of truth for which
# platform-managed (Molecule-billed) models each runtime offers (RFC
# internal#580 Option C). A template's config.yaml `runtime_config.models`
# entries tagged `provider: platform` are a PROJECTION of that SSOT — they
# must be a SUBSET. Offering a platform model the manifest doesn't declare
# risks shipping an unservable option (the SEO 1033 / "Exception: success"
# class), so we gate it here.
#
# Best-effort by design: if the manifest can't be fetched (no network /
# git access in this CI context) we WARN and skip rather than couple every
# template's CI to controlplane reachability. The deploy-time e2e
# platform-models smoke (molecule-controlplane) is the hard backstop that
# actually proves servability.
def _template_platform_models(config: dict) -> list[str]:
rc = config.get("runtime_config") or {}
out = []
for m in rc.get("models") or []:
if isinstance(m, dict) and str(m.get("provider", "")).strip().lower() == "platform":
mid = m.get("id")
if mid:
out.append(mid)
return out
def _fetch_providers_manifest() -> dict | None:
"""Load the controlplane providers manifest. PROVIDERS_MANIFEST_FILE
(a local path) short-circuits the fetch for tests / offline. Otherwise
a blobless sparse `git` clone pulls just providers.yaml using the
runner's ambient git credentials (same access the molecule-ci clone
uses). Returns the parsed dict, or None on any failure."""
local = os.environ.get("PROVIDERS_MANIFEST_FILE")
if local:
try:
with open(local, encoding="utf-8") as f:
return yaml.safe_load(f)
except Exception:
return None
import shutil
import subprocess
import tempfile
repo = os.environ.get(
"PROVIDERS_MANIFEST_REPO",
"https://git.moleculesai.app/molecule-ai/molecule-controlplane.git",
)
rel = "internal/providers/providers.yaml"
# sparse-checkout cone mode (the default) takes DIRECTORY paths, not file
# paths — `set internal/providers/providers.yaml` fails ("not a
# directory"). Use the containing directory; the file read below narrows it.
sparse_dir = "internal/providers"
tmp = tempfile.mkdtemp(prefix="cp-manifest-")
try:
subprocess.run(
["git", "clone", "--depth", "1", "--filter=blob:none", "--sparse", repo, tmp],
check=True, capture_output=True, timeout=60,
)
subprocess.run(
["git", "-C", tmp, "sparse-checkout", "set", sparse_dir],
check=True, capture_output=True, timeout=30,
)
with open(os.path.join(tmp, rel), encoding="utf-8") as f:
return yaml.safe_load(f)
except subprocess.CalledProcessError as e:
# Log stderr so a future fetch breakage is visible, not a silent skip.
stderr = (e.stderr or b"").decode("utf-8", "replace")[-300:] if isinstance(e.stderr, bytes) else str(e.stderr or "")[-300:]
print(f"::warning::providers manifest fetch failed (git {e.returncode}): {stderr.strip()}")
return None
except Exception as e:
print(f"::warning::providers manifest fetch failed: {e}")
return None
finally:
shutil.rmtree(tmp, ignore_errors=True)
def check_platform_models() -> None:
if not os.path.isfile("config.yaml"):
return # check_config_yaml already errored
try:
with open("config.yaml") as f:
config = yaml.safe_load(f)
except Exception:
return # check_config_yaml already errored on the parse
if not isinstance(config, dict):
return
tmpl_models = _template_platform_models(config)
if not tmpl_models:
return # nothing platform-managed to gate
runtime = config.get("runtime")
manifest = _fetch_providers_manifest()
if manifest is None:
warn(
"platform-model SSOT drift check skipped: could not load the controlplane "
"providers manifest (no git/network access here, or set "
"PROVIDERS_MANIFEST_FILE). The deploy-time platform-models e2e smoke is the "
"backstop."
)
return
runtimes = (manifest.get("runtimes") or {})
if runtime not in runtimes:
warn(
f"platform-model SSOT drift check skipped: runtime `{runtime}` is not in the "
f"controlplane providers manifest runtimes block, so its platform set is "
f"undefined there. Add it to providers.yaml to enable the gate."
)
return
allowed = set()
for ref in (runtimes[runtime].get("providers") or []):
if ref.get("name") == "platform":
allowed.update(ref.get("models") or [])
extra = [m for m in tmpl_models if m not in allowed]
if extra:
err(
f"config.yaml: runtime `{runtime}` offers platform model(s) {sorted(extra)} "
f"NOT in the controlplane providers manifest's platform set for this runtime "
f"({sorted(allowed)}). That manifest (internal/providers/providers.yaml "
f"runtimes block) is the SSOT for platform-managed models — declare them there "
f"first, or remove them here. Offering a platform model the SSOT doesn't "
f"declare risks an unservable option (the 1033 class)."
)
else:
print(f"✓ platform models {sorted(tmpl_models)} ⊆ manifest platform set for `{runtime}`")
def main() -> None:
# --static-only skips check_adapter_runtime_load(), which calls
# importlib's exec_module() on the template's adapter.py. That's
@@ -421,6 +552,7 @@ def main() -> None:
check_dockerfile()
check_config_yaml()
check_platform_models()
check_requirements()
check_adapter()
if not static_only: