test: extract shared signature-snapshot helpers + skill_loader gate

Two changes in one PR (tightly coupled — the second wouldn't make sense without the first): 1. Hoist the inspect-based snapshot helpers out of test_adapter_base_signature.py into tests/_signature_snapshot.py so future surfaces don't copy-paste introspection logic. - build_class_signature_record(cls): walks public methods, unwraps static/class/abstract methods, returns a stable {class, methods: [...]} dict. - build_dataclass_record(cls): walks dataclass fields via dataclasses.fields(), returns {name, frozen, fields: [...]}. - compare_against_snapshot(actual, path): writes-on-first-run + diff-on-drift, with both expected and actual JSON in failure message. test_adapter_base_signature.py is rewritten to use the helpers; the existing snapshot file is byte-identical (no behavior change). 2. New gate: tests/test_skill_loader_signature.py covers the public dataclasses exported from skill_loader/loader.py: - SkillMetadata: every adapter pattern-matches on .runtime for skill-compat filtering. Renaming this field would silently break per-adapter skill loading — the loader still returns objects, but adapters' `if "*" in skill.metadata.runtime` raises AttributeError at workspace boot. - LoadedSkill: returned in SetupResult.loaded_skills. Includes test_snapshot_has_required_skill_metadata_fields defense-in-depth: ensures the runtime / id / name / description fields stay even if both source and snapshot are updated together. Verified: deliberately renaming SkillMetadata.runtime trips the gate with full snapshot diff in the failure message. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 06:27:20 -07:00 · 2026-04-30 06:27:20 -07:00 · e336688278
commit e336688278
parent 7a6ccde7f2
4 changed files with 358 additions and 159 deletions
--- a/workspace/tests/_signature_snapshot.py
+++ b/workspace/tests/_signature_snapshot.py
@ -0,0 +1,150 @@
+"""Shared inspect-based signature-snapshot helpers (#2364 item 2).
+
+Originally lived inline in tests/test_adapter_base_signature.py.
+Extracted here so each public-surface module gets its own
+test_*_signature.py + snapshot file without copy-pasting the
+introspection logic.
+
+Pattern (one snapshot file per module):
+
+    from tests._signature_snapshot import (
+        build_class_signature_record,
+        build_dataclass_record,
+        compare_against_snapshot,
+    )
+
+    SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "<module>_signature.json"
+
+    def _build_full_snapshot() -> dict:
+        from <module> import PublicClass, PublicDataclass
+        return {
+            "module": "<module>",
+            "classes": [build_class_signature_record(PublicClass)],
+            "dataclasses": [build_dataclass_record(PublicDataclass)],
+        }
+
+    def test_<module>_signature_matches_snapshot():
+        compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)
+
+The snapshot is a stable JSON file — sort_keys + indent=2 — so
+diffs are reviewable in PR. Any drift trips the test with both
+expected and actual JSON in the failure message.
+"""
+
+import inspect
+import json
+from pathlib import Path
+
+import pytest
+
+
+def _annotation_repr(annotation: object) -> str:
+    """Stable string form of a type annotation. ``inspect`` returns the
+    runtime objects which don't compare cleanly — repr is the boring
+    correct answer for snapshotting."""
+    if annotation is inspect.Parameter.empty:
+        return ""
+    if isinstance(annotation, type):
+        return annotation.__name__
+    return str(annotation)
+
+
+def _parameter_record(p: inspect.Parameter) -> dict:
+    return {
+        "name": p.name,
+        "kind": p.kind.name,
+        "annotation": _annotation_repr(p.annotation),
+        "has_default": p.default is not inspect.Parameter.empty,
+    }
+
+
+def _signature_record(name: str, fn: object) -> dict:
+    sig = inspect.signature(fn)
+    return {
+        "name": name,
+        "is_async": inspect.iscoroutinefunction(fn),
+        "is_abstract": getattr(fn, "__isabstractmethod__", False),
+        "parameters": [_parameter_record(p) for p in sig.parameters.values()],
+        "return_annotation": _annotation_repr(sig.return_annotation),
+    }
+
+
+def build_class_signature_record(cls: type) -> dict:
+    """Snapshot a class's public method surface. Public = name doesn't
+    start with underscore. Static/class/abstract methods are unwrapped
+    so the underlying function signature is captured.
+
+    Returns: ``{class: <name>, methods: [<sorted method records>]}``
+    """
+    methods: list[dict] = []
+    for attr_name in sorted(vars(cls)):
+        if attr_name.startswith("_"):
+            continue
+        attr = vars(cls)[attr_name]
+        if isinstance(attr, staticmethod):
+            fn = attr.__func__
+        elif isinstance(attr, classmethod):
+            fn = attr.__func__
+        elif callable(attr):
+            fn = attr
+        else:
+            continue
+        methods.append(_signature_record(attr_name, fn))
+    return {"class": cls.__name__, "methods": methods}
+
+
+def build_dataclass_record(cls: type) -> dict:
+    """Snapshot a dataclass's field shape. Captures field name + type
+    annotation + has_default per field, plus the @dataclass(frozen=...)
+    flag. Default values themselves are NOT recorded (would require
+    brittle value-shape stringifying for non-trivial defaults).
+
+    Returns: ``{name, frozen, fields: [<field records>]}``
+    """
+    import dataclasses as _dc
+
+    fields = []
+    for f in _dc.fields(cls):
+        fields.append({
+            "name": f.name,
+            "annotation": _annotation_repr(f.type) if not isinstance(f.type, str) else f.type,
+            "has_default": f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING,
+        })
+    return {
+        "name": cls.__name__,
+        "frozen": getattr(cls, "__dataclass_params__").frozen,
+        "fields": fields,
+    }
+
+
+def compare_against_snapshot(actual: dict, snapshot_path: Path) -> None:
+    """Compare a built snapshot against a checked-in JSON file.
+
+    On first run (snapshot missing): writes the file and skips. Re-run
+    to verify it now passes — the snapshot file appears in the diff
+    of the PR introducing it.
+
+    On drift: fails the test with both expected and actual JSON in
+    the failure message so the reviewer sees the change without
+    re-running anything.
+    """
+    if not snapshot_path.exists():
+        snapshot_path.parent.mkdir(parents=True, exist_ok=True)
+        snapshot_path.write_text(json.dumps(actual, indent=2, sort_keys=True) + "\n")
+        pytest.skip(
+            f"snapshot did not exist; wrote {snapshot_path.name} — "
+            "re-run the test to verify it now passes"
+        )
+
+    expected = json.loads(snapshot_path.read_text())
+    if actual != expected:
+        actual_str = json.dumps(actual, indent=2, sort_keys=True)
+        expected_str = json.dumps(expected, indent=2, sort_keys=True)
+        pytest.fail(
+            f"Signature drifted from {snapshot_path.name}.\n\n"
+            "Update intentionally by deleting the snapshot file and re-running, "
+            "OR by editing it to match. The PR diff makes the change visible "
+            "to reviewers and to template repos that depend on this surface.\n\n"
+            f"=== EXPECTED ({snapshot_path.name}) ===\n{expected_str}\n\n"
+            f"=== ACTUAL (current source) ===\n{actual_str}\n"
+        )
--- a/workspace/tests/snapshots/skill_loader_signature.json
+++ b/workspace/tests/snapshots/skill_loader_signature.json
@ -0,0 +1,62 @@
+{
+  "dataclasses": [
+    {
+      "fields": [
+        {
+          "annotation": "str",
+          "has_default": false,
+          "name": "id"
+        },
+        {
+          "annotation": "str",
+          "has_default": false,
+          "name": "name"
+        },
+        {
+          "annotation": "str",
+          "has_default": false,
+          "name": "description"
+        },
+        {
+          "annotation": "list[str]",
+          "has_default": true,
+          "name": "tags"
+        },
+        {
+          "annotation": "list[str]",
+          "has_default": true,
+          "name": "examples"
+        },
+        {
+          "annotation": "list[str]",
+          "has_default": true,
+          "name": "runtime"
+        }
+      ],
+      "frozen": false,
+      "name": "SkillMetadata"
+    },
+    {
+      "fields": [
+        {
+          "annotation": "SkillMetadata",
+          "has_default": false,
+          "name": "metadata"
+        },
+        {
+          "annotation": "str",
+          "has_default": false,
+          "name": "instructions"
+        },
+        {
+          "annotation": "list[typing.Any]",
+          "has_default": true,
+          "name": "tools"
+        }
+      ],
+      "frozen": false,
+      "name": "LoadedSkill"
+    }
+  ],
+  "module": "skill_loader.loader"
+}
--- a/workspace/tests/test_adapter_base_signature.py
+++ b/workspace/tests/test_adapter_base_signature.py
@ -1,35 +1,31 @@
 """BaseAdapter public-API signature snapshot — drift gate (#2364 item 2).

 Every workspace template subclasses ``BaseAdapter``. Renaming, removing,
-or re-typing a method on the base class silently breaks templates that
-override it — the override stops being recognized as an override, the
-old method-name's caller silently invokes the default no-op, etc.
-Recent #87 universal-runtime work + #1957 recordResource refactor both
-renamed/added methods; without a frozen snapshot, the next rename ships
-quietly and only surfaces when a template's CI catches the AttributeError
-days later.
+or re-typing a method on the base class — or a field on the public
+dataclasses (SetupResult, AdapterConfig, RuntimeCapabilities) —
+silently breaks templates that rely on the old shape. Without a
+frozen snapshot, the next rename ships quietly and only surfaces when
+a template's CI catches the AttributeError days later.

-This test pins the public surface. It walks ``BaseAdapter`` with
-``inspect`` and compares the result against a checked-in JSON snapshot.
-Any drift = test failure.
+Helpers live in ``tests/_signature_snapshot.py`` so future surfaces
+(skill_loader, etc.) reuse the same introspection logic.

 When the failure is intentional:

  1. Make the API change in ``adapter_base.py``.
  2. Run the test once to see the diff in the failure message.
  3. Update ``tests/snapshots/adapter_base_signature.json`` to match
-     the new shape — that update IS the explicit acknowledgment that
-     templates need follow-up. Reviewer of the PR sees the snapshot
-     diff in their review and decides whether template repos need
-     coordinated updates.
+     the new shape (or delete it and re-run to regenerate). That
+     update IS the explicit acknowledgment that templates need
+     follow-up. Reviewer of the PR sees the snapshot diff in their
+     review and decides whether template repos need coordinated
+     updates.

-Same-shape pattern as PR #2363's A2A protocol-compat replay gate
-(workspace-server/internal/handlers/testdata/a2a_corpus). Both close
-drift classes by snapshotting the structural surface that templates
-or callers depend on.
+Same-shape pattern as PR #2363's A2A protocol-compat replay gate.
+Both close drift classes by snapshotting the structural surface that
+templates or callers depend on.
 """

-import inspect
 import json
 import sys
 from pathlib import Path
@ -41,157 +37,40 @@ WORKSPACE_DIR = Path(__file__).parent.parent
 if str(WORKSPACE_DIR) not in sys.path:
    sys.path.insert(0, str(WORKSPACE_DIR))

+from tests._signature_snapshot import (  # noqa: E402
+    build_class_signature_record,
+    build_dataclass_record,
+    compare_against_snapshot,
+)
+
 SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "adapter_base_signature.json"


-def _annotation_repr(annotation: object) -> str:
-    """Stable string form of a type annotation. ``inspect`` returns the
-    runtime objects which don't compare cleanly — repr is the boring
-    correct answer for snapshotting."""
-    if annotation is inspect.Parameter.empty:
-        return ""
-    if isinstance(annotation, type):
-        return annotation.__name__
-    # types.UnionType / typing.Union / forward refs — repr captures all
-    return str(annotation)
+def _build_full_snapshot() -> dict:
+    """Snapshot of BaseAdapter methods + the three public dataclasses
+    that form the call/return contract between the platform and every
+    adapter:

-
-def _parameter_record(p: inspect.Parameter) -> dict:
-    return {
-        "name": p.name,
-        "kind": p.kind.name,
-        "annotation": _annotation_repr(p.annotation),
-        "has_default": p.default is not inspect.Parameter.empty,
-    }
-
-
-def _signature_record(name: str, fn: object) -> dict:
-    sig = inspect.signature(fn)
-    return {
-        "name": name,
-        "is_async": inspect.iscoroutinefunction(fn),
-        "is_abstract": getattr(fn, "__isabstractmethod__", False),
-        "parameters": [_parameter_record(p) for p in sig.parameters.values()],
-        "return_annotation": _annotation_repr(sig.return_annotation),
-    }
-
-
-def _build_signature_snapshot() -> dict:
-    """Walk BaseAdapter and produce a stable JSON-serializable snapshot."""
-    from adapter_base import BaseAdapter  # imported lazy so test discovery is fast
-
-    methods: list[dict] = []
-    for attr_name in sorted(vars(BaseAdapter)):
-        if attr_name.startswith("_"):
-            continue
-        attr = vars(BaseAdapter)[attr_name]
-        # Only callables — skip data attributes (none today, but
-        # forward-defensive). staticmethod / classmethod are unwrapped
-        # via __func__; abstractmethod wraps the underlying fn.
-        if isinstance(attr, staticmethod):
-            fn = attr.__func__
-        elif isinstance(attr, classmethod):
-            fn = attr.__func__
-        elif callable(attr):
-            fn = attr
-        else:
-            continue
-        methods.append(_signature_record(attr_name, fn))
-    return {"class": "BaseAdapter", "methods": methods}
-
-
-def _dataclass_record(cls: type) -> dict:
-    """Stable JSON shape for a public dataclass exported from
-    adapter_base. Captures field name + type annotation + default
-    presence so renaming, retyping, or making-required-vs-optional
-    drift trips the gate.
-
-    Note on defaults: we record presence-of-default, not the default
-    value. A literal default like ``False`` or ``None`` is part of the
-    contract (templates inherit it), but reproducing it here would
-    require value-shape stringifying that's brittle for non-trivial
-    defaults (lists, dataclasses-as-defaults). Presence is enough to
-    catch the dangerous transitions (required → optional and vice
-    versa).
-    """
-    import dataclasses as _dc
-
-    fields = []
-    for f in _dc.fields(cls):
-        fields.append({
-            "name": f.name,
-            "annotation": _annotation_repr(f.type) if not isinstance(f.type, str) else f.type,
-            "has_default": f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING,
-        })
-    return {
-        "name": cls.__name__,
-        "frozen": getattr(cls, "__dataclass_params__").frozen,
-        "fields": fields,
-    }
-
-
-def _build_dataclass_snapshot() -> list[dict]:
-    """Snapshot the public dataclasses exported from adapter_base.
-
-    These types form the call-and-return shape between the platform
-    and every adapter:
      - SetupResult: returned by adapter._common_setup()
      - AdapterConfig: passed into adapter setup hooks
-      - RuntimeCapabilities: returned by adapter.capabilities() and
-        consumed by platform-side dispatch routing (#117). A field
-        rename here silently disables every native-capability flag
-        every adapter currently declares.
+      - RuntimeCapabilities: returned by adapter.capabilities();
+        drives platform-side dispatch routing (#117). A field rename
+        here silently disables every native-capability flag every
+        adapter currently declares.
    """
-    from adapter_base import AdapterConfig, RuntimeCapabilities, SetupResult
+    from adapter_base import AdapterConfig, BaseAdapter, RuntimeCapabilities, SetupResult

-    classes = [SetupResult, AdapterConfig, RuntimeCapabilities]
-    return [_dataclass_record(cls) for cls in classes]
-
-
-def _build_full_snapshot() -> dict:
-    """Combined snapshot — BaseAdapter methods + public dataclasses."""
-    return {
-        **_build_signature_snapshot(),
-        "dataclasses": _build_dataclass_snapshot(),
-    }
+    snap = build_class_signature_record(BaseAdapter)
+    snap["dataclasses"] = [
+        build_dataclass_record(SetupResult),
+        build_dataclass_record(AdapterConfig),
+        build_dataclass_record(RuntimeCapabilities),
+    ]
+    return snap


 def test_base_adapter_signature_matches_snapshot():
-    """Pin BaseAdapter's public API surface against a frozen snapshot.
-
-    Covers BOTH method signatures AND public dataclass field shapes
-    (SetupResult, AdapterConfig, RuntimeCapabilities). Renaming a
-    RuntimeCapabilities field would silently disable every adapter's
-    capability declaration without this gate.
-
-    On failure, the test prints both the expected and actual snapshot
-    JSON so the diff is human-readable. Updating the snapshot is the
-    explicit ack that a template-affecting API change is intentional.
-    """
-    actual = _build_full_snapshot()
-    if not SNAPSHOT_PATH.exists():
-        # First-run convenience: write the snapshot if missing. A reviewer
-        # of the introducing PR sees the new file in the diff.
-        SNAPSHOT_PATH.parent.mkdir(parents=True, exist_ok=True)
-        SNAPSHOT_PATH.write_text(json.dumps(actual, indent=2, sort_keys=True) + "\n")
-        pytest.skip(
-            f"snapshot did not exist; wrote {SNAPSHOT_PATH.name} — "
-            "re-run the test to verify it now passes"
-        )
-
-    expected = json.loads(SNAPSHOT_PATH.read_text())
-    if actual != expected:
-        # Pretty-print both for the failure message so reviewer sees what
-        # changed without rerunning anything.
-        actual_str = json.dumps(actual, indent=2, sort_keys=True)
-        expected_str = json.dumps(expected, indent=2, sort_keys=True)
-        pytest.fail(
-            "BaseAdapter signature drifted from snapshot.\n\n"
-            f"To update intentionally:\n  cp <(python -c 'from tests.test_adapter_base_signature import _build_full_snapshot; import json; print(json.dumps(_build_full_snapshot(), indent=2, sort_keys=True))') {SNAPSHOT_PATH}\n"
-            "Or rerun with the snapshot deleted to regenerate.\n\n"
-            f"=== EXPECTED ({SNAPSHOT_PATH.name}) ===\n{expected_str}\n\n"
-            f"=== ACTUAL (current adapter_base.py) ===\n{actual_str}\n"
-        )
+    compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)


 def test_snapshot_has_required_methods():
--- a/workspace/tests/test_skill_loader_signature.py
+++ b/workspace/tests/test_skill_loader_signature.py
@ -0,0 +1,108 @@
+"""skill_loader public-API signature snapshot — drift gate.
+
+Every workspace template's adapter pulls skills via
+``skill_loader.load_skills``. The returned ``LoadedSkill`` objects
+expose ``metadata`` (a ``SkillMetadata``) which adapters pattern-match
+on for runtime-compat filtering — see ``hermes`` and ``claude-code``
+adapters which inspect ``skill.metadata.runtime`` to decide whether
+to load a skill or skip it.
+
+Renaming a field on ``SkillMetadata`` (e.g. ``runtime`` → ``runtimes``)
+would silently break that filtering. The skill loader call still
+returns objects, but every adapter's ``if "*" in skill.metadata.runtime``
+check raises AttributeError at workspace boot — too late to be caught
+by the introducing PR's CI.
+
+Same drift class as the BaseAdapter signature snapshot (#2378, #2380),
+applied to a different public surface.
+"""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+WORKSPACE_DIR = Path(__file__).parent.parent
+if str(WORKSPACE_DIR) not in sys.path:
+    sys.path.insert(0, str(WORKSPACE_DIR))
+
+from tests._signature_snapshot import (  # noqa: E402
+    build_dataclass_record,
+    compare_against_snapshot,
+)
+
+SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "skill_loader_signature.json"
+
+
+def _build_full_snapshot() -> dict:
+    """Snapshot the public dataclasses exported from skill_loader.
+
+    SkillMetadata fields are consumed by:
+      - adapter runtime filtering (``runtime`` field)
+      - canvas UI display (``name``, ``description``, ``tags``)
+      - skill discovery / search (``id``, ``examples``)
+
+    LoadedSkill is the return shape from ``load_skills`` and is held
+    in ``SetupResult.loaded_skills`` — every adapter consumes it.
+    """
+    from skill_loader.loader import LoadedSkill, SkillMetadata
+
+    return {
+        "module": "skill_loader.loader",
+        "dataclasses": [
+            build_dataclass_record(SkillMetadata),
+            build_dataclass_record(LoadedSkill),
+        ],
+    }
+
+
+def test_skill_loader_signature_matches_snapshot():
+    compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)
+
+
+def test_snapshot_has_required_skill_metadata_fields():
+    """Defense-in-depth — adapters pattern-match on these specific
+    field names. Removing one without a coordinated update breaks
+    every adapter's skill-filter logic.
+    """
+    if not SNAPSHOT_PATH.exists():
+        pytest.skip(f"{SNAPSHOT_PATH.name} not generated yet")
+
+    import json
+    snapshot = json.loads(SNAPSHOT_PATH.read_text())
+    dataclasses = {dc["name"]: dc for dc in snapshot.get("dataclasses", [])}
+
+    expected = {
+        "SkillMetadata": {
+            "id",
+            "name",
+            "description",
+            # `runtime` drives per-adapter skill-compat filtering. If
+            # this field is renamed, every adapter's
+            # `if "*" in skill.metadata.runtime` check raises
+            # AttributeError at workspace boot.
+            "runtime",
+        },
+        "LoadedSkill": {
+            "metadata",
+            "instructions",
+            "tools",
+        },
+    }
+
+    for cls_name, required_fields in expected.items():
+        if cls_name not in dataclasses:
+            pytest.fail(
+                f"Public dataclass {cls_name} missing from snapshot — "
+                "either it was removed from skill_loader.loader, OR the "
+                "snapshot wasn't regenerated after a refactor."
+            )
+        actual_fields = {f["name"] for f in dataclasses[cls_name]["fields"]}
+        missing = required_fields - actual_fields
+        if missing:
+            pytest.fail(
+                f"{cls_name} is missing required fields: {sorted(missing)}.\n"
+                "Either restore them on skill_loader/loader.py, OR coordinate "
+                "adapter + template updates AND remove the entry from "
+                "`expected` in this test with a justification."
+            )