From e336688278fdf3b65e0aad7bde312b843264e9c4 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 30 Apr 2026 06:27:20 -0700 Subject: [PATCH] test: extract shared signature-snapshot helpers + skill_loader gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes in one PR (tightly coupled — the second wouldn't make sense without the first): 1. Hoist the inspect-based snapshot helpers out of test_adapter_base_signature.py into tests/_signature_snapshot.py so future surfaces don't copy-paste introspection logic. - build_class_signature_record(cls): walks public methods, unwraps static/class/abstract methods, returns a stable {class, methods: [...]} dict. - build_dataclass_record(cls): walks dataclass fields via dataclasses.fields(), returns {name, frozen, fields: [...]}. - compare_against_snapshot(actual, path): writes-on-first-run + diff-on-drift, with both expected and actual JSON in failure message. test_adapter_base_signature.py is rewritten to use the helpers; the existing snapshot file is byte-identical (no behavior change). 2. New gate: tests/test_skill_loader_signature.py covers the public dataclasses exported from skill_loader/loader.py: - SkillMetadata: every adapter pattern-matches on .runtime for skill-compat filtering. Renaming this field would silently break per-adapter skill loading — the loader still returns objects, but adapters' `if "*" in skill.metadata.runtime` raises AttributeError at workspace boot. - LoadedSkill: returned in SetupResult.loaded_skills. Includes test_snapshot_has_required_skill_metadata_fields defense-in-depth: ensures the runtime / id / name / description fields stay even if both source and snapshot are updated together. Verified: deliberately renaming SkillMetadata.runtime trips the gate with full snapshot diff in the failure message. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/tests/_signature_snapshot.py | 150 +++++++++++++ .../snapshots/skill_loader_signature.json | 62 ++++++ .../tests/test_adapter_base_signature.py | 197 ++++-------------- .../tests/test_skill_loader_signature.py | 108 ++++++++++ 4 files changed, 358 insertions(+), 159 deletions(-) create mode 100644 workspace/tests/_signature_snapshot.py create mode 100644 workspace/tests/snapshots/skill_loader_signature.json create mode 100644 workspace/tests/test_skill_loader_signature.py diff --git a/workspace/tests/_signature_snapshot.py b/workspace/tests/_signature_snapshot.py new file mode 100644 index 00000000..709229c2 --- /dev/null +++ b/workspace/tests/_signature_snapshot.py @@ -0,0 +1,150 @@ +"""Shared inspect-based signature-snapshot helpers (#2364 item 2). + +Originally lived inline in tests/test_adapter_base_signature.py. +Extracted here so each public-surface module gets its own +test_*_signature.py + snapshot file without copy-pasting the +introspection logic. + +Pattern (one snapshot file per module): + + from tests._signature_snapshot import ( + build_class_signature_record, + build_dataclass_record, + compare_against_snapshot, + ) + + SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "_signature.json" + + def _build_full_snapshot() -> dict: + from import PublicClass, PublicDataclass + return { + "module": "", + "classes": [build_class_signature_record(PublicClass)], + "dataclasses": [build_dataclass_record(PublicDataclass)], + } + + def test__signature_matches_snapshot(): + compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH) + +The snapshot is a stable JSON file — sort_keys + indent=2 — so +diffs are reviewable in PR. Any drift trips the test with both +expected and actual JSON in the failure message. +""" + +import inspect +import json +from pathlib import Path + +import pytest + + +def _annotation_repr(annotation: object) -> str: + """Stable string form of a type annotation. ``inspect`` returns the + runtime objects which don't compare cleanly — repr is the boring + correct answer for snapshotting.""" + if annotation is inspect.Parameter.empty: + return "" + if isinstance(annotation, type): + return annotation.__name__ + return str(annotation) + + +def _parameter_record(p: inspect.Parameter) -> dict: + return { + "name": p.name, + "kind": p.kind.name, + "annotation": _annotation_repr(p.annotation), + "has_default": p.default is not inspect.Parameter.empty, + } + + +def _signature_record(name: str, fn: object) -> dict: + sig = inspect.signature(fn) + return { + "name": name, + "is_async": inspect.iscoroutinefunction(fn), + "is_abstract": getattr(fn, "__isabstractmethod__", False), + "parameters": [_parameter_record(p) for p in sig.parameters.values()], + "return_annotation": _annotation_repr(sig.return_annotation), + } + + +def build_class_signature_record(cls: type) -> dict: + """Snapshot a class's public method surface. Public = name doesn't + start with underscore. Static/class/abstract methods are unwrapped + so the underlying function signature is captured. + + Returns: ``{class: , methods: []}`` + """ + methods: list[dict] = [] + for attr_name in sorted(vars(cls)): + if attr_name.startswith("_"): + continue + attr = vars(cls)[attr_name] + if isinstance(attr, staticmethod): + fn = attr.__func__ + elif isinstance(attr, classmethod): + fn = attr.__func__ + elif callable(attr): + fn = attr + else: + continue + methods.append(_signature_record(attr_name, fn)) + return {"class": cls.__name__, "methods": methods} + + +def build_dataclass_record(cls: type) -> dict: + """Snapshot a dataclass's field shape. Captures field name + type + annotation + has_default per field, plus the @dataclass(frozen=...) + flag. Default values themselves are NOT recorded (would require + brittle value-shape stringifying for non-trivial defaults). + + Returns: ``{name, frozen, fields: []}`` + """ + import dataclasses as _dc + + fields = [] + for f in _dc.fields(cls): + fields.append({ + "name": f.name, + "annotation": _annotation_repr(f.type) if not isinstance(f.type, str) else f.type, + "has_default": f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING, + }) + return { + "name": cls.__name__, + "frozen": getattr(cls, "__dataclass_params__").frozen, + "fields": fields, + } + + +def compare_against_snapshot(actual: dict, snapshot_path: Path) -> None: + """Compare a built snapshot against a checked-in JSON file. + + On first run (snapshot missing): writes the file and skips. Re-run + to verify it now passes — the snapshot file appears in the diff + of the PR introducing it. + + On drift: fails the test with both expected and actual JSON in + the failure message so the reviewer sees the change without + re-running anything. + """ + if not snapshot_path.exists(): + snapshot_path.parent.mkdir(parents=True, exist_ok=True) + snapshot_path.write_text(json.dumps(actual, indent=2, sort_keys=True) + "\n") + pytest.skip( + f"snapshot did not exist; wrote {snapshot_path.name} — " + "re-run the test to verify it now passes" + ) + + expected = json.loads(snapshot_path.read_text()) + if actual != expected: + actual_str = json.dumps(actual, indent=2, sort_keys=True) + expected_str = json.dumps(expected, indent=2, sort_keys=True) + pytest.fail( + f"Signature drifted from {snapshot_path.name}.\n\n" + "Update intentionally by deleting the snapshot file and re-running, " + "OR by editing it to match. The PR diff makes the change visible " + "to reviewers and to template repos that depend on this surface.\n\n" + f"=== EXPECTED ({snapshot_path.name}) ===\n{expected_str}\n\n" + f"=== ACTUAL (current source) ===\n{actual_str}\n" + ) diff --git a/workspace/tests/snapshots/skill_loader_signature.json b/workspace/tests/snapshots/skill_loader_signature.json new file mode 100644 index 00000000..6cec2922 --- /dev/null +++ b/workspace/tests/snapshots/skill_loader_signature.json @@ -0,0 +1,62 @@ +{ + "dataclasses": [ + { + "fields": [ + { + "annotation": "str", + "has_default": false, + "name": "id" + }, + { + "annotation": "str", + "has_default": false, + "name": "name" + }, + { + "annotation": "str", + "has_default": false, + "name": "description" + }, + { + "annotation": "list[str]", + "has_default": true, + "name": "tags" + }, + { + "annotation": "list[str]", + "has_default": true, + "name": "examples" + }, + { + "annotation": "list[str]", + "has_default": true, + "name": "runtime" + } + ], + "frozen": false, + "name": "SkillMetadata" + }, + { + "fields": [ + { + "annotation": "SkillMetadata", + "has_default": false, + "name": "metadata" + }, + { + "annotation": "str", + "has_default": false, + "name": "instructions" + }, + { + "annotation": "list[typing.Any]", + "has_default": true, + "name": "tools" + } + ], + "frozen": false, + "name": "LoadedSkill" + } + ], + "module": "skill_loader.loader" +} diff --git a/workspace/tests/test_adapter_base_signature.py b/workspace/tests/test_adapter_base_signature.py index f1fc7d38..c0fdc264 100644 --- a/workspace/tests/test_adapter_base_signature.py +++ b/workspace/tests/test_adapter_base_signature.py @@ -1,35 +1,31 @@ """BaseAdapter public-API signature snapshot — drift gate (#2364 item 2). Every workspace template subclasses ``BaseAdapter``. Renaming, removing, -or re-typing a method on the base class silently breaks templates that -override it — the override stops being recognized as an override, the -old method-name's caller silently invokes the default no-op, etc. -Recent #87 universal-runtime work + #1957 recordResource refactor both -renamed/added methods; without a frozen snapshot, the next rename ships -quietly and only surfaces when a template's CI catches the AttributeError -days later. +or re-typing a method on the base class — or a field on the public +dataclasses (SetupResult, AdapterConfig, RuntimeCapabilities) — +silently breaks templates that rely on the old shape. Without a +frozen snapshot, the next rename ships quietly and only surfaces when +a template's CI catches the AttributeError days later. -This test pins the public surface. It walks ``BaseAdapter`` with -``inspect`` and compares the result against a checked-in JSON snapshot. -Any drift = test failure. +Helpers live in ``tests/_signature_snapshot.py`` so future surfaces +(skill_loader, etc.) reuse the same introspection logic. When the failure is intentional: 1. Make the API change in ``adapter_base.py``. 2. Run the test once to see the diff in the failure message. 3. Update ``tests/snapshots/adapter_base_signature.json`` to match - the new shape — that update IS the explicit acknowledgment that - templates need follow-up. Reviewer of the PR sees the snapshot - diff in their review and decides whether template repos need - coordinated updates. + the new shape (or delete it and re-run to regenerate). That + update IS the explicit acknowledgment that templates need + follow-up. Reviewer of the PR sees the snapshot diff in their + review and decides whether template repos need coordinated + updates. -Same-shape pattern as PR #2363's A2A protocol-compat replay gate -(workspace-server/internal/handlers/testdata/a2a_corpus). Both close -drift classes by snapshotting the structural surface that templates -or callers depend on. +Same-shape pattern as PR #2363's A2A protocol-compat replay gate. +Both close drift classes by snapshotting the structural surface that +templates or callers depend on. """ -import inspect import json import sys from pathlib import Path @@ -41,157 +37,40 @@ WORKSPACE_DIR = Path(__file__).parent.parent if str(WORKSPACE_DIR) not in sys.path: sys.path.insert(0, str(WORKSPACE_DIR)) +from tests._signature_snapshot import ( # noqa: E402 + build_class_signature_record, + build_dataclass_record, + compare_against_snapshot, +) + SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "adapter_base_signature.json" -def _annotation_repr(annotation: object) -> str: - """Stable string form of a type annotation. ``inspect`` returns the - runtime objects which don't compare cleanly — repr is the boring - correct answer for snapshotting.""" - if annotation is inspect.Parameter.empty: - return "" - if isinstance(annotation, type): - return annotation.__name__ - # types.UnionType / typing.Union / forward refs — repr captures all - return str(annotation) +def _build_full_snapshot() -> dict: + """Snapshot of BaseAdapter methods + the three public dataclasses + that form the call/return contract between the platform and every + adapter: - -def _parameter_record(p: inspect.Parameter) -> dict: - return { - "name": p.name, - "kind": p.kind.name, - "annotation": _annotation_repr(p.annotation), - "has_default": p.default is not inspect.Parameter.empty, - } - - -def _signature_record(name: str, fn: object) -> dict: - sig = inspect.signature(fn) - return { - "name": name, - "is_async": inspect.iscoroutinefunction(fn), - "is_abstract": getattr(fn, "__isabstractmethod__", False), - "parameters": [_parameter_record(p) for p in sig.parameters.values()], - "return_annotation": _annotation_repr(sig.return_annotation), - } - - -def _build_signature_snapshot() -> dict: - """Walk BaseAdapter and produce a stable JSON-serializable snapshot.""" - from adapter_base import BaseAdapter # imported lazy so test discovery is fast - - methods: list[dict] = [] - for attr_name in sorted(vars(BaseAdapter)): - if attr_name.startswith("_"): - continue - attr = vars(BaseAdapter)[attr_name] - # Only callables — skip data attributes (none today, but - # forward-defensive). staticmethod / classmethod are unwrapped - # via __func__; abstractmethod wraps the underlying fn. - if isinstance(attr, staticmethod): - fn = attr.__func__ - elif isinstance(attr, classmethod): - fn = attr.__func__ - elif callable(attr): - fn = attr - else: - continue - methods.append(_signature_record(attr_name, fn)) - return {"class": "BaseAdapter", "methods": methods} - - -def _dataclass_record(cls: type) -> dict: - """Stable JSON shape for a public dataclass exported from - adapter_base. Captures field name + type annotation + default - presence so renaming, retyping, or making-required-vs-optional - drift trips the gate. - - Note on defaults: we record presence-of-default, not the default - value. A literal default like ``False`` or ``None`` is part of the - contract (templates inherit it), but reproducing it here would - require value-shape stringifying that's brittle for non-trivial - defaults (lists, dataclasses-as-defaults). Presence is enough to - catch the dangerous transitions (required → optional and vice - versa). - """ - import dataclasses as _dc - - fields = [] - for f in _dc.fields(cls): - fields.append({ - "name": f.name, - "annotation": _annotation_repr(f.type) if not isinstance(f.type, str) else f.type, - "has_default": f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING, - }) - return { - "name": cls.__name__, - "frozen": getattr(cls, "__dataclass_params__").frozen, - "fields": fields, - } - - -def _build_dataclass_snapshot() -> list[dict]: - """Snapshot the public dataclasses exported from adapter_base. - - These types form the call-and-return shape between the platform - and every adapter: - SetupResult: returned by adapter._common_setup() - AdapterConfig: passed into adapter setup hooks - - RuntimeCapabilities: returned by adapter.capabilities() and - consumed by platform-side dispatch routing (#117). A field - rename here silently disables every native-capability flag - every adapter currently declares. + - RuntimeCapabilities: returned by adapter.capabilities(); + drives platform-side dispatch routing (#117). A field rename + here silently disables every native-capability flag every + adapter currently declares. """ - from adapter_base import AdapterConfig, RuntimeCapabilities, SetupResult + from adapter_base import AdapterConfig, BaseAdapter, RuntimeCapabilities, SetupResult - classes = [SetupResult, AdapterConfig, RuntimeCapabilities] - return [_dataclass_record(cls) for cls in classes] - - -def _build_full_snapshot() -> dict: - """Combined snapshot — BaseAdapter methods + public dataclasses.""" - return { - **_build_signature_snapshot(), - "dataclasses": _build_dataclass_snapshot(), - } + snap = build_class_signature_record(BaseAdapter) + snap["dataclasses"] = [ + build_dataclass_record(SetupResult), + build_dataclass_record(AdapterConfig), + build_dataclass_record(RuntimeCapabilities), + ] + return snap def test_base_adapter_signature_matches_snapshot(): - """Pin BaseAdapter's public API surface against a frozen snapshot. - - Covers BOTH method signatures AND public dataclass field shapes - (SetupResult, AdapterConfig, RuntimeCapabilities). Renaming a - RuntimeCapabilities field would silently disable every adapter's - capability declaration without this gate. - - On failure, the test prints both the expected and actual snapshot - JSON so the diff is human-readable. Updating the snapshot is the - explicit ack that a template-affecting API change is intentional. - """ - actual = _build_full_snapshot() - if not SNAPSHOT_PATH.exists(): - # First-run convenience: write the snapshot if missing. A reviewer - # of the introducing PR sees the new file in the diff. - SNAPSHOT_PATH.parent.mkdir(parents=True, exist_ok=True) - SNAPSHOT_PATH.write_text(json.dumps(actual, indent=2, sort_keys=True) + "\n") - pytest.skip( - f"snapshot did not exist; wrote {SNAPSHOT_PATH.name} — " - "re-run the test to verify it now passes" - ) - - expected = json.loads(SNAPSHOT_PATH.read_text()) - if actual != expected: - # Pretty-print both for the failure message so reviewer sees what - # changed without rerunning anything. - actual_str = json.dumps(actual, indent=2, sort_keys=True) - expected_str = json.dumps(expected, indent=2, sort_keys=True) - pytest.fail( - "BaseAdapter signature drifted from snapshot.\n\n" - f"To update intentionally:\n cp <(python -c 'from tests.test_adapter_base_signature import _build_full_snapshot; import json; print(json.dumps(_build_full_snapshot(), indent=2, sort_keys=True))') {SNAPSHOT_PATH}\n" - "Or rerun with the snapshot deleted to regenerate.\n\n" - f"=== EXPECTED ({SNAPSHOT_PATH.name}) ===\n{expected_str}\n\n" - f"=== ACTUAL (current adapter_base.py) ===\n{actual_str}\n" - ) + compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH) def test_snapshot_has_required_methods(): diff --git a/workspace/tests/test_skill_loader_signature.py b/workspace/tests/test_skill_loader_signature.py new file mode 100644 index 00000000..5ccaef3d --- /dev/null +++ b/workspace/tests/test_skill_loader_signature.py @@ -0,0 +1,108 @@ +"""skill_loader public-API signature snapshot — drift gate. + +Every workspace template's adapter pulls skills via +``skill_loader.load_skills``. The returned ``LoadedSkill`` objects +expose ``metadata`` (a ``SkillMetadata``) which adapters pattern-match +on for runtime-compat filtering — see ``hermes`` and ``claude-code`` +adapters which inspect ``skill.metadata.runtime`` to decide whether +to load a skill or skip it. + +Renaming a field on ``SkillMetadata`` (e.g. ``runtime`` → ``runtimes``) +would silently break that filtering. The skill loader call still +returns objects, but every adapter's ``if "*" in skill.metadata.runtime`` +check raises AttributeError at workspace boot — too late to be caught +by the introducing PR's CI. + +Same drift class as the BaseAdapter signature snapshot (#2378, #2380), +applied to a different public surface. +""" + +import sys +from pathlib import Path + +import pytest + +WORKSPACE_DIR = Path(__file__).parent.parent +if str(WORKSPACE_DIR) not in sys.path: + sys.path.insert(0, str(WORKSPACE_DIR)) + +from tests._signature_snapshot import ( # noqa: E402 + build_dataclass_record, + compare_against_snapshot, +) + +SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "skill_loader_signature.json" + + +def _build_full_snapshot() -> dict: + """Snapshot the public dataclasses exported from skill_loader. + + SkillMetadata fields are consumed by: + - adapter runtime filtering (``runtime`` field) + - canvas UI display (``name``, ``description``, ``tags``) + - skill discovery / search (``id``, ``examples``) + + LoadedSkill is the return shape from ``load_skills`` and is held + in ``SetupResult.loaded_skills`` — every adapter consumes it. + """ + from skill_loader.loader import LoadedSkill, SkillMetadata + + return { + "module": "skill_loader.loader", + "dataclasses": [ + build_dataclass_record(SkillMetadata), + build_dataclass_record(LoadedSkill), + ], + } + + +def test_skill_loader_signature_matches_snapshot(): + compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH) + + +def test_snapshot_has_required_skill_metadata_fields(): + """Defense-in-depth — adapters pattern-match on these specific + field names. Removing one without a coordinated update breaks + every adapter's skill-filter logic. + """ + if not SNAPSHOT_PATH.exists(): + pytest.skip(f"{SNAPSHOT_PATH.name} not generated yet") + + import json + snapshot = json.loads(SNAPSHOT_PATH.read_text()) + dataclasses = {dc["name"]: dc for dc in snapshot.get("dataclasses", [])} + + expected = { + "SkillMetadata": { + "id", + "name", + "description", + # `runtime` drives per-adapter skill-compat filtering. If + # this field is renamed, every adapter's + # `if "*" in skill.metadata.runtime` check raises + # AttributeError at workspace boot. + "runtime", + }, + "LoadedSkill": { + "metadata", + "instructions", + "tools", + }, + } + + for cls_name, required_fields in expected.items(): + if cls_name not in dataclasses: + pytest.fail( + f"Public dataclass {cls_name} missing from snapshot — " + "either it was removed from skill_loader.loader, OR the " + "snapshot wasn't regenerated after a refactor." + ) + actual_fields = {f["name"] for f in dataclasses[cls_name]["fields"]} + missing = required_fields - actual_fields + if missing: + pytest.fail( + f"{cls_name} is missing required fields: {sorted(missing)}.\n" + "Either restore them on skill_loader/loader.py, OR coordinate " + "adapter + template updates AND remove the entry from " + "`expected` in this test with a justification." + )