fix(runtime): isolate card-skill enrichment + transcript handler from adapter shape mismatch

PR #2756 added a try/except around adapter.setup() so a missing LLM key doesn't crash the workspace boot. Two paths that now run AFTER setup succeeds were not similarly isolated, leaving small but real coupling risks for future adapter authors. 1. **Skill metadata enrichment swap (main.py:248-259).** When adapter.setup() returns, main.py reads adapter.loaded_skills and replaces the static stubs in agent_card.skills with rich metadata (description, tags, examples). The list comprehension assumes each element exposes .metadata.{id,name,description,tags,examples}. A future adapter that returns a non-canonical shape would raise AttributeError, propagate to the outer except, capture as adapter_error, and silently degrade an OK boot to the not-configured state — even though setup() actually succeeded. Extract to card_helpers.enrich_card_skills(card, loaded_skills) → bool. Helper swallows enrichment failures, logs the cause, returns False, leaves the static stubs in place. setup() success path continues unchanged. 6 unit tests cover: None input, empty list, canonical happy path, missing .metadata attr, partial .metadata (missing one canonical field), atomic-failure-no-partial-swap. 2. **/transcript handler (main.py:513).** Calls await adapter.transcript_lines(...) without try/except. BaseAdapter's default returns {"supported": false} so today's 4 adapters never trigger this — but a future adapter override that assumes setup() ran would surface as a 500 from Starlette's default error handler instead of a useful 503 with the exception class + message. Inline try/except returns 503 with the reason, matching the not-configured JSON-RPC handler's pattern. Both changes match the architectural principle the PR #2756 chain established: availability (workspace reachable) is decoupled from configuration / adapter behavior. Operators see useful errors instead of silent degradation; future adapter authors can't accidentally break tenant readiness with a shape mismatch. Adds: - workspace/card_helpers.py (~50 lines, 100% covered) - workspace/tests/test_card_helpers.py (6 tests) - AgentCard/AgentSkill/AgentCapabilities/AgentInterface stubs to workspace/tests/conftest.py so future card-related tests work under the existing a2a-mock infrastructure - card_helpers in TOP_LEVEL_MODULES (drift gate would have caught it) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 14:15:27 -07:00 · 2026-05-04 14:15:27 -07:00 · 63ac99788b
commit 63ac99788b
parent 28472f0d2d
5 changed files with 282 additions and 13 deletions
--- a/scripts/build_runtime_package.py
+++ b/scripts/build_runtime_package.py
@ -58,6 +58,7 @@ TOP_LEVEL_MODULES = {
    "adapter_base",
    "agent",
    "agents_md",
    "card_helpers",
    "config",
    "configs_dir",
    "consolidation",
--- a/workspace/card_helpers.py
+++ b/workspace/card_helpers.py
@ -0,0 +1,57 @@
 """Helpers for building / mutating the workspace ``AgentCard``.
 Kept as their own module so the behavior is unit-testable without booting
 the whole runtime (``main.py`` is ``# pragma: no cover``).
 """
 from __future__ import annotations
 from typing import Iterable
 from a2a.types import AgentCard, AgentSkill
 def enrich_card_skills(card: AgentCard, loaded_skills: Iterable | None) -> bool:
    """Replace ``card.skills`` with rich metadata from the adapter's loaded
    skills, in place. Pairs with PR #2756: the card was built up front from
    static ``config.skills`` names so /.well-known/agent-card.json could
    serve before ``adapter.setup()`` finishes; this swaps in the richer
    descriptions/tags/examples that ``setup()``'s skill loader produces.
    Returns ``True`` on swap, ``False`` when the swap was skipped or
    failed. Failure cases:
    * ``loaded_skills`` is None / empty — caller didn't load any.
    * Any element doesn't expose ``.metadata.{id,name,description,tags,examples}``
      (a future adapter that doesn't follow the canonical shape).
    Failures DO NOT raise — a malformed ``loaded_skills`` shape would
    otherwise propagate to ``main.py``'s outer ``except Exception``,
    silently degrading an OK boot to the not-configured state. Static
    stubs from ``config.skills`` stay in place; setup() already
    succeeded, the agent works, only the card's skill enrichment is
    degraded. Operator sees a clear log line; tests assert this
    distinction.
    """
    if not loaded_skills:
        return False
    try:
        rich = [
            AgentSkill(
                id=skill.metadata.id,
                name=skill.metadata.name,
                description=skill.metadata.description,
                tags=skill.metadata.tags,
                examples=skill.metadata.examples,
            )
            for skill in loaded_skills
        ]
    except Exception as enrich_err:  # noqa: BLE001
        print(
            f"Warning: skill metadata enrichment failed (keeping static "
            f"stubs from config.skills): {type(enrich_err).__name__}: {enrich_err}",
            flush=True,
        )
        return False
    card.skills = rich
    return True
--- a/workspace/main.py
+++ b/workspace/main.py
@ -245,18 +245,13 @@ async def main():  # pragma: no cover
        # 6c. Swap rich skill metadata into the card now that setup() loaded
        # them. In-place mutation: a2a-sdk's create_agent_card_routes serialises
        # the card on each request, so the route mounted below sees the update.
-        loaded_skills = getattr(adapter, "loaded_skills", None)
+        # Isolated via card_helpers.enrich_card_skills — a malformed
-        if loaded_skills:
+        # loaded_skills shape (e.g., a future adapter that doesn't follow
-            agent_card.skills = [
+        # the .metadata convention) is logged + swallowed instead of
-                AgentSkill(
+        # propagating up to the outer except, where it would silently
-                    id=skill.metadata.id,
+        # degrade an OK boot to the not-configured state.
-                    name=skill.metadata.name,
+        from card_helpers import enrich_card_skills
-                    description=skill.metadata.description,
+        enrich_card_skills(agent_card, getattr(adapter, "loaded_skills", None))
                    tags=skill.metadata.tags,
                    examples=skill.metadata.examples,
                )
                for skill in loaded_skills
            ]
        adapter_ready = True
    except SystemExit:
        # Smoke-mode exit signal — propagate untouched.
@ -497,7 +492,24 @@ async def main():  # pragma: no cover
            limit = int(request.query_params.get("limit", "100"))
        except (TypeError, ValueError):
            return JSONResponse({"error": "since and limit must be integers"}, status_code=400)
-        result = await adapter.transcript_lines(since=since, limit=limit)
+        # Isolate adapter call: misconfigured boots leave the adapter
        # partially-initialised, and a future adapter override of
        # transcript_lines might assume setup() ran. Surface a 503 with
        # a clear reason instead of letting the exception propagate to
        # Starlette's 500 handler — same pattern as the not-configured
        # JSON-RPC route (PR #2756). BaseAdapter.transcript_lines's
        # default returns {"supported": false} so today's 4 adapters
        # never trigger this branch; this is the safety net.
        try:
            result = await adapter.transcript_lines(since=since, limit=limit)
        except Exception as transcript_err:  # noqa: BLE001
            return JSONResponse(
                {
                    "error": "transcript unavailable",
                    "detail": f"{type(transcript_err).__name__}: {transcript_err}",
                },
                status_code=503,
            )
        return JSONResponse(result)
    starlette_app.add_route("/transcript", _transcript_handler, methods=["GET"])
--- a/workspace/tests/conftest.py
+++ b/workspace/tests/conftest.py
@ -145,6 +145,42 @@ def _make_a2a_mocks():
    types_mod.TaskStatus = TaskStatus
    types_mod.TaskState = _TaskStateEnum
    # v1 AgentCard / AgentSkill / AgentCapabilities / AgentInterface — used
    # by main.py's static-card construction (PR #2756) and by
    # card_helpers.enrich_card_skills's swap path. Stubs preserve kwargs so
    # tests can assert on card.skills[i].name etc., and let card.skills be
    # reassigned in place (the production code's enrichment pattern).
    class AgentSkill:
        def __init__(self, id="", name="", description="", tags=None, examples=None, **kwargs):
            self.id = id
            self.name = name
            self.description = description
            self.tags = list(tags) if tags is not None else []
            self.examples = list(examples) if examples is not None else []
            for k, v in kwargs.items():
                setattr(self, k, v)
    class AgentCapabilities:
        def __init__(self, **kwargs):
            for k, v in kwargs.items():
                setattr(self, k, v)
    class AgentInterface:
        def __init__(self, **kwargs):
            for k, v in kwargs.items():
                setattr(self, k, v)
    class AgentCard:
        def __init__(self, **kwargs):
            self.skills = []
            for k, v in kwargs.items():
                setattr(self, k, v)
    types_mod.AgentSkill = AgentSkill
    types_mod.AgentCapabilities = AgentCapabilities
    types_mod.AgentInterface = AgentInterface
    types_mod.AgentCard = AgentCard
    # a2a.helpers (v1: moved from a2a.utils, renamed new_agent_text_message
    # → new_text_message). Mock both names — production code only calls
    # new_text_message, but if any test still references the old name it
--- a/workspace/tests/test_card_helpers.py
+++ b/workspace/tests/test_card_helpers.py
@ -0,0 +1,163 @@
 """Tests for ``card_helpers.enrich_card_skills`` — the defensive swap that
 replaces ``AgentCard.skills`` with rich metadata from the adapter's
 loaded skills, falling back to the static stubs on shape mismatch.
 The whole point of the helper (vs inline in main.py) is that a future
 adapter author who returns a non-standard ``loaded_skills`` shape
 should NOT silently downgrade their workspace boot to not-configured —
 ``setup()`` succeeded, the agent works, only the card's skill metadata
 enrichment is degraded.
 """
 from __future__ import annotations
 import sys
 from pathlib import Path
 WORKSPACE_DIR = Path(__file__).resolve().parents[1]
 if str(WORKSPACE_DIR) not in sys.path:
    sys.path.insert(0, str(WORKSPACE_DIR))
 from a2a.types import AgentCard, AgentCapabilities, AgentInterface, AgentSkill
 from card_helpers import enrich_card_skills
 def _make_card(static_skill_names):
    return AgentCard(
        name="test-agent",
        description="test",
        version="0.0.0",
        supported_interfaces=[
            AgentInterface(protocol_binding="https://a2a.g/v1", url="http://x:8000")
        ],
        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
        skills=[
            AgentSkill(id=n, name=n, description=n, tags=[], examples=[])
            for n in static_skill_names
        ],
        default_input_modes=["text/plain"],
        default_output_modes=["text/plain"],
    )
 class _SkillMetadata:
    """Mimics the adapter-side Skill.metadata shape."""
    def __init__(self, id, name, description, tags, examples):
        self.id = id
        self.name = name
        self.description = description
        self.tags = tags
        self.examples = examples
 class _Skill:
    def __init__(self, **kwargs):
        self.metadata = _SkillMetadata(**kwargs)
 def test_returns_false_on_none():
    """No loaded_skills → caller didn't load any → no swap, no log spam."""
    card = _make_card(["a", "b"])
    assert enrich_card_skills(card, None) is False
    # Static stubs preserved.
    assert [s.id for s in card.skills] == ["a", "b"]
 def test_returns_false_on_empty_list():
    """Empty list → same treatment as None: nothing to enrich."""
    card = _make_card(["a"])
    assert enrich_card_skills(card, []) is False
    assert [s.id for s in card.skills] == ["a"]
 def test_swaps_in_rich_metadata_on_canonical_shape():
    """The happy path: adapter returns Skill objects with the canonical
    .metadata shape, card gets the richer descriptions/tags/examples."""
    card = _make_card(["search"])  # static stub
    rich = [
        _Skill(
            id="search",
            name="Web Search",
            description="Search the web for the user's question",
            tags=["web", "io"],
            examples=["who won the world cup in 2022?"],
        ),
    ]
    assert enrich_card_skills(card, rich) is True
    assert len(card.skills) == 1
    assert card.skills[0].id == "search"
    assert card.skills[0].name == "Web Search"
    assert "web" in card.skills[0].tags
    assert card.skills[0].examples == ["who won the world cup in 2022?"]
 def test_returns_false_and_keeps_stubs_when_metadata_attr_missing(capsys):
    """Defensive: a future adapter that returns objects without
    ``.metadata`` would otherwise raise AttributeError and propagate to
    main.py's outer except — silently degrading an OK boot to
    not-configured. Helper logs + returns False instead, static stubs
    stay in place.
    This is the reason the helper exists at all; without it the
    inline swap in main.py at PR #2756 was a coupling between adapter
    discipline and tenant-facing readiness."""
    card = _make_card(["a"])
    class NoMetadata:
        id = "x"  # has id but no .metadata.id (the canonical path)
    assert enrich_card_skills(card, [NoMetadata()]) is False
    # Static stub preserved.
    assert [s.id for s in card.skills] == ["a"]
    # Operator gets a log line.
    captured = capsys.readouterr()
    assert "skill metadata enrichment failed" in captured.out
 def test_returns_false_when_metadata_is_partial(capsys):
    """Partial shape — has .metadata but the .metadata object lacks one
    of the canonical attrs (here: ``examples``). The list comprehension
    raises AttributeError on ``skill.metadata.examples`` access, which
    the helper swallows. (In production, a2a.types.AgentSkill is a
    Pydantic model that ALSO raises on missing required fields — both
    failure modes route through the same except branch.)"""
    card = _make_card(["a"])
    class PartialMeta:
        def __init__(self):
            self.id = "x"
            self.name = "x"
            self.description = "x"
            self.tags = []
            # examples missing
    class PartialSkill:
        def __init__(self):
            self.metadata = PartialMeta()
    result = enrich_card_skills(card, [PartialSkill()])
    assert result is False
    assert [s.id for s in card.skills] == ["a"]
    captured = capsys.readouterr()
    assert "skill metadata enrichment failed" in captured.out
 def test_failure_is_atomic_no_partial_swap(capsys):
    """If the second skill is malformed, the FIRST skill's swap must NOT
    leak into card.skills. We use a list-comprehension which builds the
    full list before assignment; verify that property holds.
    Without this property, a misbehaving adapter could half-corrupt the
    card — operators would see "1 skill listed" when 3 were declared,
    no log line if the inline swap was partial."""
    card = _make_card(["a", "b"])
    valid = _Skill(id="x", name="x", description="x", tags=[], examples=[])
    class BadSkill:
        # No .metadata at all.
        pass
    assert enrich_card_skills(card, [valid, BadSkill()]) is False
    # Original two static stubs intact — card.skills was never reassigned.
    assert [s.id for s in card.skills] == ["a", "b"]