test: extract shared signature-snapshot helpers + skill_loader gate

Two changes in one PR (tightly coupled — the second wouldn't make
sense without the first):

1. Hoist the inspect-based snapshot helpers out of
   test_adapter_base_signature.py into tests/_signature_snapshot.py
   so future surfaces don't copy-paste introspection logic.

   - build_class_signature_record(cls): walks public methods,
     unwraps static/class/abstract methods, returns a stable
     {class, methods: [...]} dict.
   - build_dataclass_record(cls): walks dataclass fields via
     dataclasses.fields(), returns {name, frozen, fields: [...]}.
   - compare_against_snapshot(actual, path): writes-on-first-run +
     diff-on-drift, with both expected and actual JSON in failure
     message.

   test_adapter_base_signature.py is rewritten to use the helpers;
   the existing snapshot file is byte-identical (no behavior change).

2. New gate: tests/test_skill_loader_signature.py covers the
   public dataclasses exported from skill_loader/loader.py:

   - SkillMetadata: every adapter pattern-matches on .runtime for
     skill-compat filtering. Renaming this field would silently
     break per-adapter skill loading — the loader still returns
     objects, but adapters' `if "*" in skill.metadata.runtime`
     raises AttributeError at workspace boot.
   - LoadedSkill: returned in SetupResult.loaded_skills.

   Includes test_snapshot_has_required_skill_metadata_fields
   defense-in-depth: ensures the runtime / id / name / description
   fields stay even if both source and snapshot are updated together.

Verified: deliberately renaming SkillMetadata.runtime trips the
gate with full snapshot diff in the failure message.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-30 06:27:20 -07:00
parent 7a6ccde7f2
commit e336688278
4 changed files with 358 additions and 159 deletions

View File

@ -0,0 +1,150 @@
"""Shared inspect-based signature-snapshot helpers (#2364 item 2).
Originally lived inline in tests/test_adapter_base_signature.py.
Extracted here so each public-surface module gets its own
test_*_signature.py + snapshot file without copy-pasting the
introspection logic.
Pattern (one snapshot file per module):
from tests._signature_snapshot import (
build_class_signature_record,
build_dataclass_record,
compare_against_snapshot,
)
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "<module>_signature.json"
def _build_full_snapshot() -> dict:
from <module> import PublicClass, PublicDataclass
return {
"module": "<module>",
"classes": [build_class_signature_record(PublicClass)],
"dataclasses": [build_dataclass_record(PublicDataclass)],
}
def test_<module>_signature_matches_snapshot():
compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)
The snapshot is a stable JSON file sort_keys + indent=2 so
diffs are reviewable in PR. Any drift trips the test with both
expected and actual JSON in the failure message.
"""
import inspect
import json
from pathlib import Path
import pytest
def _annotation_repr(annotation: object) -> str:
"""Stable string form of a type annotation. ``inspect`` returns the
runtime objects which don't compare cleanly — repr is the boring
correct answer for snapshotting."""
if annotation is inspect.Parameter.empty:
return ""
if isinstance(annotation, type):
return annotation.__name__
return str(annotation)
def _parameter_record(p: inspect.Parameter) -> dict:
return {
"name": p.name,
"kind": p.kind.name,
"annotation": _annotation_repr(p.annotation),
"has_default": p.default is not inspect.Parameter.empty,
}
def _signature_record(name: str, fn: object) -> dict:
sig = inspect.signature(fn)
return {
"name": name,
"is_async": inspect.iscoroutinefunction(fn),
"is_abstract": getattr(fn, "__isabstractmethod__", False),
"parameters": [_parameter_record(p) for p in sig.parameters.values()],
"return_annotation": _annotation_repr(sig.return_annotation),
}
def build_class_signature_record(cls: type) -> dict:
"""Snapshot a class's public method surface. Public = name doesn't
start with underscore. Static/class/abstract methods are unwrapped
so the underlying function signature is captured.
Returns: ``{class: <name>, methods: [<sorted method records>]}``
"""
methods: list[dict] = []
for attr_name in sorted(vars(cls)):
if attr_name.startswith("_"):
continue
attr = vars(cls)[attr_name]
if isinstance(attr, staticmethod):
fn = attr.__func__
elif isinstance(attr, classmethod):
fn = attr.__func__
elif callable(attr):
fn = attr
else:
continue
methods.append(_signature_record(attr_name, fn))
return {"class": cls.__name__, "methods": methods}
def build_dataclass_record(cls: type) -> dict:
"""Snapshot a dataclass's field shape. Captures field name + type
annotation + has_default per field, plus the @dataclass(frozen=...)
flag. Default values themselves are NOT recorded (would require
brittle value-shape stringifying for non-trivial defaults).
Returns: ``{name, frozen, fields: [<field records>]}``
"""
import dataclasses as _dc
fields = []
for f in _dc.fields(cls):
fields.append({
"name": f.name,
"annotation": _annotation_repr(f.type) if not isinstance(f.type, str) else f.type,
"has_default": f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING,
})
return {
"name": cls.__name__,
"frozen": getattr(cls, "__dataclass_params__").frozen,
"fields": fields,
}
def compare_against_snapshot(actual: dict, snapshot_path: Path) -> None:
"""Compare a built snapshot against a checked-in JSON file.
On first run (snapshot missing): writes the file and skips. Re-run
to verify it now passes the snapshot file appears in the diff
of the PR introducing it.
On drift: fails the test with both expected and actual JSON in
the failure message so the reviewer sees the change without
re-running anything.
"""
if not snapshot_path.exists():
snapshot_path.parent.mkdir(parents=True, exist_ok=True)
snapshot_path.write_text(json.dumps(actual, indent=2, sort_keys=True) + "\n")
pytest.skip(
f"snapshot did not exist; wrote {snapshot_path.name}"
"re-run the test to verify it now passes"
)
expected = json.loads(snapshot_path.read_text())
if actual != expected:
actual_str = json.dumps(actual, indent=2, sort_keys=True)
expected_str = json.dumps(expected, indent=2, sort_keys=True)
pytest.fail(
f"Signature drifted from {snapshot_path.name}.\n\n"
"Update intentionally by deleting the snapshot file and re-running, "
"OR by editing it to match. The PR diff makes the change visible "
"to reviewers and to template repos that depend on this surface.\n\n"
f"=== EXPECTED ({snapshot_path.name}) ===\n{expected_str}\n\n"
f"=== ACTUAL (current source) ===\n{actual_str}\n"
)

View File

@ -0,0 +1,62 @@
{
"dataclasses": [
{
"fields": [
{
"annotation": "str",
"has_default": false,
"name": "id"
},
{
"annotation": "str",
"has_default": false,
"name": "name"
},
{
"annotation": "str",
"has_default": false,
"name": "description"
},
{
"annotation": "list[str]",
"has_default": true,
"name": "tags"
},
{
"annotation": "list[str]",
"has_default": true,
"name": "examples"
},
{
"annotation": "list[str]",
"has_default": true,
"name": "runtime"
}
],
"frozen": false,
"name": "SkillMetadata"
},
{
"fields": [
{
"annotation": "SkillMetadata",
"has_default": false,
"name": "metadata"
},
{
"annotation": "str",
"has_default": false,
"name": "instructions"
},
{
"annotation": "list[typing.Any]",
"has_default": true,
"name": "tools"
}
],
"frozen": false,
"name": "LoadedSkill"
}
],
"module": "skill_loader.loader"
}

View File

@ -1,35 +1,31 @@
"""BaseAdapter public-API signature snapshot — drift gate (#2364 item 2).
Every workspace template subclasses ``BaseAdapter``. Renaming, removing,
or re-typing a method on the base class silently breaks templates that
override it the override stops being recognized as an override, the
old method-name's caller silently invokes the default no-op, etc.
Recent #87 universal-runtime work + #1957 recordResource refactor both
renamed/added methods; without a frozen snapshot, the next rename ships
quietly and only surfaces when a template's CI catches the AttributeError
days later.
or re-typing a method on the base class or a field on the public
dataclasses (SetupResult, AdapterConfig, RuntimeCapabilities)
silently breaks templates that rely on the old shape. Without a
frozen snapshot, the next rename ships quietly and only surfaces when
a template's CI catches the AttributeError days later.
This test pins the public surface. It walks ``BaseAdapter`` with
``inspect`` and compares the result against a checked-in JSON snapshot.
Any drift = test failure.
Helpers live in ``tests/_signature_snapshot.py`` so future surfaces
(skill_loader, etc.) reuse the same introspection logic.
When the failure is intentional:
1. Make the API change in ``adapter_base.py``.
2. Run the test once to see the diff in the failure message.
3. Update ``tests/snapshots/adapter_base_signature.json`` to match
the new shape that update IS the explicit acknowledgment that
templates need follow-up. Reviewer of the PR sees the snapshot
diff in their review and decides whether template repos need
coordinated updates.
the new shape (or delete it and re-run to regenerate). That
update IS the explicit acknowledgment that templates need
follow-up. Reviewer of the PR sees the snapshot diff in their
review and decides whether template repos need coordinated
updates.
Same-shape pattern as PR #2363's A2A protocol-compat replay gate
(workspace-server/internal/handlers/testdata/a2a_corpus). Both close
drift classes by snapshotting the structural surface that templates
or callers depend on.
Same-shape pattern as PR #2363's A2A protocol-compat replay gate.
Both close drift classes by snapshotting the structural surface that
templates or callers depend on.
"""
import inspect
import json
import sys
from pathlib import Path
@ -41,157 +37,40 @@ WORKSPACE_DIR = Path(__file__).parent.parent
if str(WORKSPACE_DIR) not in sys.path:
sys.path.insert(0, str(WORKSPACE_DIR))
from tests._signature_snapshot import ( # noqa: E402
build_class_signature_record,
build_dataclass_record,
compare_against_snapshot,
)
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "adapter_base_signature.json"
def _annotation_repr(annotation: object) -> str:
"""Stable string form of a type annotation. ``inspect`` returns the
runtime objects which don't compare cleanly — repr is the boring
correct answer for snapshotting."""
if annotation is inspect.Parameter.empty:
return ""
if isinstance(annotation, type):
return annotation.__name__
# types.UnionType / typing.Union / forward refs — repr captures all
return str(annotation)
def _build_full_snapshot() -> dict:
"""Snapshot of BaseAdapter methods + the three public dataclasses
that form the call/return contract between the platform and every
adapter:
def _parameter_record(p: inspect.Parameter) -> dict:
return {
"name": p.name,
"kind": p.kind.name,
"annotation": _annotation_repr(p.annotation),
"has_default": p.default is not inspect.Parameter.empty,
}
def _signature_record(name: str, fn: object) -> dict:
sig = inspect.signature(fn)
return {
"name": name,
"is_async": inspect.iscoroutinefunction(fn),
"is_abstract": getattr(fn, "__isabstractmethod__", False),
"parameters": [_parameter_record(p) for p in sig.parameters.values()],
"return_annotation": _annotation_repr(sig.return_annotation),
}
def _build_signature_snapshot() -> dict:
"""Walk BaseAdapter and produce a stable JSON-serializable snapshot."""
from adapter_base import BaseAdapter # imported lazy so test discovery is fast
methods: list[dict] = []
for attr_name in sorted(vars(BaseAdapter)):
if attr_name.startswith("_"):
continue
attr = vars(BaseAdapter)[attr_name]
# Only callables — skip data attributes (none today, but
# forward-defensive). staticmethod / classmethod are unwrapped
# via __func__; abstractmethod wraps the underlying fn.
if isinstance(attr, staticmethod):
fn = attr.__func__
elif isinstance(attr, classmethod):
fn = attr.__func__
elif callable(attr):
fn = attr
else:
continue
methods.append(_signature_record(attr_name, fn))
return {"class": "BaseAdapter", "methods": methods}
def _dataclass_record(cls: type) -> dict:
"""Stable JSON shape for a public dataclass exported from
adapter_base. Captures field name + type annotation + default
presence so renaming, retyping, or making-required-vs-optional
drift trips the gate.
Note on defaults: we record presence-of-default, not the default
value. A literal default like ``False`` or ``None`` is part of the
contract (templates inherit it), but reproducing it here would
require value-shape stringifying that's brittle for non-trivial
defaults (lists, dataclasses-as-defaults). Presence is enough to
catch the dangerous transitions (required optional and vice
versa).
"""
import dataclasses as _dc
fields = []
for f in _dc.fields(cls):
fields.append({
"name": f.name,
"annotation": _annotation_repr(f.type) if not isinstance(f.type, str) else f.type,
"has_default": f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING,
})
return {
"name": cls.__name__,
"frozen": getattr(cls, "__dataclass_params__").frozen,
"fields": fields,
}
def _build_dataclass_snapshot() -> list[dict]:
"""Snapshot the public dataclasses exported from adapter_base.
These types form the call-and-return shape between the platform
and every adapter:
- SetupResult: returned by adapter._common_setup()
- AdapterConfig: passed into adapter setup hooks
- RuntimeCapabilities: returned by adapter.capabilities() and
consumed by platform-side dispatch routing (#117). A field
rename here silently disables every native-capability flag
every adapter currently declares.
- RuntimeCapabilities: returned by adapter.capabilities();
drives platform-side dispatch routing (#117). A field rename
here silently disables every native-capability flag every
adapter currently declares.
"""
from adapter_base import AdapterConfig, RuntimeCapabilities, SetupResult
from adapter_base import AdapterConfig, BaseAdapter, RuntimeCapabilities, SetupResult
classes = [SetupResult, AdapterConfig, RuntimeCapabilities]
return [_dataclass_record(cls) for cls in classes]
def _build_full_snapshot() -> dict:
"""Combined snapshot — BaseAdapter methods + public dataclasses."""
return {
**_build_signature_snapshot(),
"dataclasses": _build_dataclass_snapshot(),
}
snap = build_class_signature_record(BaseAdapter)
snap["dataclasses"] = [
build_dataclass_record(SetupResult),
build_dataclass_record(AdapterConfig),
build_dataclass_record(RuntimeCapabilities),
]
return snap
def test_base_adapter_signature_matches_snapshot():
"""Pin BaseAdapter's public API surface against a frozen snapshot.
Covers BOTH method signatures AND public dataclass field shapes
(SetupResult, AdapterConfig, RuntimeCapabilities). Renaming a
RuntimeCapabilities field would silently disable every adapter's
capability declaration without this gate.
On failure, the test prints both the expected and actual snapshot
JSON so the diff is human-readable. Updating the snapshot is the
explicit ack that a template-affecting API change is intentional.
"""
actual = _build_full_snapshot()
if not SNAPSHOT_PATH.exists():
# First-run convenience: write the snapshot if missing. A reviewer
# of the introducing PR sees the new file in the diff.
SNAPSHOT_PATH.parent.mkdir(parents=True, exist_ok=True)
SNAPSHOT_PATH.write_text(json.dumps(actual, indent=2, sort_keys=True) + "\n")
pytest.skip(
f"snapshot did not exist; wrote {SNAPSHOT_PATH.name}"
"re-run the test to verify it now passes"
)
expected = json.loads(SNAPSHOT_PATH.read_text())
if actual != expected:
# Pretty-print both for the failure message so reviewer sees what
# changed without rerunning anything.
actual_str = json.dumps(actual, indent=2, sort_keys=True)
expected_str = json.dumps(expected, indent=2, sort_keys=True)
pytest.fail(
"BaseAdapter signature drifted from snapshot.\n\n"
f"To update intentionally:\n cp <(python -c 'from tests.test_adapter_base_signature import _build_full_snapshot; import json; print(json.dumps(_build_full_snapshot(), indent=2, sort_keys=True))') {SNAPSHOT_PATH}\n"
"Or rerun with the snapshot deleted to regenerate.\n\n"
f"=== EXPECTED ({SNAPSHOT_PATH.name}) ===\n{expected_str}\n\n"
f"=== ACTUAL (current adapter_base.py) ===\n{actual_str}\n"
)
compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)
def test_snapshot_has_required_methods():

View File

@ -0,0 +1,108 @@
"""skill_loader public-API signature snapshot — drift gate.
Every workspace template's adapter pulls skills via
``skill_loader.load_skills``. The returned ``LoadedSkill`` objects
expose ``metadata`` (a ``SkillMetadata``) which adapters pattern-match
on for runtime-compat filtering see ``hermes`` and ``claude-code``
adapters which inspect ``skill.metadata.runtime`` to decide whether
to load a skill or skip it.
Renaming a field on ``SkillMetadata`` (e.g. ``runtime`` ``runtimes``)
would silently break that filtering. The skill loader call still
returns objects, but every adapter's ``if "*" in skill.metadata.runtime``
check raises AttributeError at workspace boot too late to be caught
by the introducing PR's CI.
Same drift class as the BaseAdapter signature snapshot (#2378, #2380),
applied to a different public surface.
"""
import sys
from pathlib import Path
import pytest
WORKSPACE_DIR = Path(__file__).parent.parent
if str(WORKSPACE_DIR) not in sys.path:
sys.path.insert(0, str(WORKSPACE_DIR))
from tests._signature_snapshot import ( # noqa: E402
build_dataclass_record,
compare_against_snapshot,
)
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "skill_loader_signature.json"
def _build_full_snapshot() -> dict:
"""Snapshot the public dataclasses exported from skill_loader.
SkillMetadata fields are consumed by:
- adapter runtime filtering (``runtime`` field)
- canvas UI display (``name``, ``description``, ``tags``)
- skill discovery / search (``id``, ``examples``)
LoadedSkill is the return shape from ``load_skills`` and is held
in ``SetupResult.loaded_skills`` every adapter consumes it.
"""
from skill_loader.loader import LoadedSkill, SkillMetadata
return {
"module": "skill_loader.loader",
"dataclasses": [
build_dataclass_record(SkillMetadata),
build_dataclass_record(LoadedSkill),
],
}
def test_skill_loader_signature_matches_snapshot():
compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)
def test_snapshot_has_required_skill_metadata_fields():
"""Defense-in-depth — adapters pattern-match on these specific
field names. Removing one without a coordinated update breaks
every adapter's skill-filter logic.
"""
if not SNAPSHOT_PATH.exists():
pytest.skip(f"{SNAPSHOT_PATH.name} not generated yet")
import json
snapshot = json.loads(SNAPSHOT_PATH.read_text())
dataclasses = {dc["name"]: dc for dc in snapshot.get("dataclasses", [])}
expected = {
"SkillMetadata": {
"id",
"name",
"description",
# `runtime` drives per-adapter skill-compat filtering. If
# this field is renamed, every adapter's
# `if "*" in skill.metadata.runtime` check raises
# AttributeError at workspace boot.
"runtime",
},
"LoadedSkill": {
"metadata",
"instructions",
"tools",
},
}
for cls_name, required_fields in expected.items():
if cls_name not in dataclasses:
pytest.fail(
f"Public dataclass {cls_name} missing from snapshot — "
"either it was removed from skill_loader.loader, OR the "
"snapshot wasn't regenerated after a refactor."
)
actual_fields = {f["name"] for f in dataclasses[cls_name]["fields"]}
missing = required_fields - actual_fields
if missing:
pytest.fail(
f"{cls_name} is missing required fields: {sorted(missing)}.\n"
"Either restore them on skill_loader/loader.py, OR coordinate "
"adapter + template updates AND remove the entry from "
"`expected` in this test with a justification."
)