fix(runtime): isolate card-skill enrichment + transcript handler from adapter shape mismatch

PR #2756 added a try/except around adapter.setup() so a missing LLM key doesn't crash the workspace boot. Two paths that now run AFTER setup succeeds were not similarly isolated, leaving small but real coupling risks for future adapter authors. 1. **Skill metadata enrichment swap (main.py:248-259).** When adapter.setup() returns, main.py reads adapter.loaded_skills and replaces the static stubs in agent_card.skills with rich metadata (description, tags, examples). The list comprehension assumes each element exposes .metadata.{id,name,description,tags,examples}. A future adapter that returns a non-canonical shape would raise AttributeError, propagate to the outer except, capture as adapter_error, and silently degrade an OK boot to the not-configured state — even though setup() actually succeeded. Extract to card_helpers.enrich_card_skills(card, loaded_skills) → bool. Helper swallows enrichment failures, logs the cause, returns False, leaves the static stubs in place. setup() success path continues unchanged. 6 unit tests cover: None input, empty list, canonical happy path, missing .metadata attr, partial .metadata (missing one canonical field), atomic-failure-no-partial-swap. 2. **/transcript handler (main.py:513).** Calls await adapter.transcript_lines(...) without try/except. BaseAdapter's default returns {"supported": false} so today's 4 adapters never trigger this — but a future adapter override that assumes setup() ran would surface as a 500 from Starlette's default error handler instead of a useful 503 with the exception class + message. Inline try/except returns 503 with the reason, matching the not-configured JSON-RPC handler's pattern. Both changes match the architectural principle the PR #2756 chain established: availability (workspace reachable) is decoupled from configuration / adapter behavior. Operators see useful errors instead of silent degradation; future adapter authors can't accidentally break tenant readiness with a shape mismatch. Adds: - workspace/card_helpers.py (~50 lines, 100% covered) - workspace/tests/test_card_helpers.py (6 tests) - AgentCard/AgentSkill/AgentCapabilities/AgentInterface stubs to workspace/tests/conftest.py so future card-related tests work under the existing a2a-mock infrastructure - card_helpers in TOP_LEVEL_MODULES (drift gate would have caught it) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 14:15:27 -07:00 · 2026-05-04 14:15:27 -07:00 · 63ac99788b
commit 63ac99788b
parent 28472f0d2d
5 changed files with 282 additions and 13 deletions
--- a/scripts/build_runtime_package.py
+++ b/scripts/build_runtime_package.py
@ -58,6 +58,7 @@ TOP_LEVEL_MODULES = {
    "adapter_base",
    "agent",
    "agents_md",
+    "card_helpers",
    "config",
    "configs_dir",
    "consolidation",
--- a/workspace/card_helpers.py
+++ b/workspace/card_helpers.py
@ -0,0 +1,57 @@
+"""Helpers for building / mutating the workspace ``AgentCard``.
+
+Kept as their own module so the behavior is unit-testable without booting
+the whole runtime (``main.py`` is ``# pragma: no cover``).
+"""
+from __future__ import annotations
+
+from typing import Iterable
+
+from a2a.types import AgentCard, AgentSkill
+
+
+def enrich_card_skills(card: AgentCard, loaded_skills: Iterable | None) -> bool:
+    """Replace ``card.skills`` with rich metadata from the adapter's loaded
+    skills, in place. Pairs with PR #2756: the card was built up front from
+    static ``config.skills`` names so /.well-known/agent-card.json could
+    serve before ``adapter.setup()`` finishes; this swaps in the richer
+    descriptions/tags/examples that ``setup()``'s skill loader produces.
+
+    Returns ``True`` on swap, ``False`` when the swap was skipped or
+    failed. Failure cases:
+    * ``loaded_skills`` is None / empty — caller didn't load any.
+    * Any element doesn't expose ``.metadata.{id,name,description,tags,examples}``
+      (a future adapter that doesn't follow the canonical shape).
+
+    Failures DO NOT raise — a malformed ``loaded_skills`` shape would
+    otherwise propagate to ``main.py``'s outer ``except Exception``,
+    silently degrading an OK boot to the not-configured state. Static
+    stubs from ``config.skills`` stay in place; setup() already
+    succeeded, the agent works, only the card's skill enrichment is
+    degraded. Operator sees a clear log line; tests assert this
+    distinction.
+    """
+    if not loaded_skills:
+        return False
+
+    try:
+        rich = [
+            AgentSkill(
+                id=skill.metadata.id,
+                name=skill.metadata.name,
+                description=skill.metadata.description,
+                tags=skill.metadata.tags,
+                examples=skill.metadata.examples,
+            )
+            for skill in loaded_skills
+        ]
+    except Exception as enrich_err:  # noqa: BLE001
+        print(
+            f"Warning: skill metadata enrichment failed (keeping static "
+            f"stubs from config.skills): {type(enrich_err).__name__}: {enrich_err}",
+            flush=True,
+        )
+        return False
+
+    card.skills = rich
+    return True
--- a/workspace/main.py
+++ b/workspace/main.py
@ -245,18 +245,13 @@ async def main():  # pragma: no cover
        # 6c. Swap rich skill metadata into the card now that setup() loaded
        # them. In-place mutation: a2a-sdk's create_agent_card_routes serialises
        # the card on each request, so the route mounted below sees the update.
-        loaded_skills = getattr(adapter, "loaded_skills", None)
-        if loaded_skills:
-            agent_card.skills = [
-                AgentSkill(
-                    id=skill.metadata.id,
-                    name=skill.metadata.name,
-                    description=skill.metadata.description,
-                    tags=skill.metadata.tags,
-                    examples=skill.metadata.examples,
-                )
-                for skill in loaded_skills
-            ]
+        # Isolated via card_helpers.enrich_card_skills — a malformed
+        # loaded_skills shape (e.g., a future adapter that doesn't follow
+        # the .metadata convention) is logged + swallowed instead of
+        # propagating up to the outer except, where it would silently
+        # degrade an OK boot to the not-configured state.
+        from card_helpers import enrich_card_skills
+        enrich_card_skills(agent_card, getattr(adapter, "loaded_skills", None))
        adapter_ready = True
    except SystemExit:
        # Smoke-mode exit signal — propagate untouched.
@ -497,7 +492,24 @@ async def main():  # pragma: no cover
            limit = int(request.query_params.get("limit", "100"))
        except (TypeError, ValueError):
            return JSONResponse({"error": "since and limit must be integers"}, status_code=400)
+        # Isolate adapter call: misconfigured boots leave the adapter
+        # partially-initialised, and a future adapter override of
+        # transcript_lines might assume setup() ran. Surface a 503 with
+        # a clear reason instead of letting the exception propagate to
+        # Starlette's 500 handler — same pattern as the not-configured
+        # JSON-RPC route (PR #2756). BaseAdapter.transcript_lines's
+        # default returns {"supported": false} so today's 4 adapters
+        # never trigger this branch; this is the safety net.
+        try:
            result = await adapter.transcript_lines(since=since, limit=limit)
+        except Exception as transcript_err:  # noqa: BLE001
+            return JSONResponse(
+                {
+                    "error": "transcript unavailable",
+                    "detail": f"{type(transcript_err).__name__}: {transcript_err}",
+                },
+                status_code=503,
+            )
        return JSONResponse(result)

    starlette_app.add_route("/transcript", _transcript_handler, methods=["GET"])
--- a/workspace/tests/conftest.py
+++ b/workspace/tests/conftest.py
@ -145,6 +145,42 @@ def _make_a2a_mocks():
    types_mod.TaskStatus = TaskStatus
    types_mod.TaskState = _TaskStateEnum

+    # v1 AgentCard / AgentSkill / AgentCapabilities / AgentInterface — used
+    # by main.py's static-card construction (PR #2756) and by
+    # card_helpers.enrich_card_skills's swap path. Stubs preserve kwargs so
+    # tests can assert on card.skills[i].name etc., and let card.skills be
+    # reassigned in place (the production code's enrichment pattern).
+    class AgentSkill:
+        def __init__(self, id="", name="", description="", tags=None, examples=None, **kwargs):
+            self.id = id
+            self.name = name
+            self.description = description
+            self.tags = list(tags) if tags is not None else []
+            self.examples = list(examples) if examples is not None else []
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    class AgentCapabilities:
+        def __init__(self, **kwargs):
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    class AgentInterface:
+        def __init__(self, **kwargs):
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    class AgentCard:
+        def __init__(self, **kwargs):
+            self.skills = []
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    types_mod.AgentSkill = AgentSkill
+    types_mod.AgentCapabilities = AgentCapabilities
+    types_mod.AgentInterface = AgentInterface
+    types_mod.AgentCard = AgentCard
+
    # a2a.helpers (v1: moved from a2a.utils, renamed new_agent_text_message
    # → new_text_message). Mock both names — production code only calls
    # new_text_message, but if any test still references the old name it
--- a/workspace/tests/test_card_helpers.py
+++ b/workspace/tests/test_card_helpers.py
@ -0,0 +1,163 @@
+"""Tests for ``card_helpers.enrich_card_skills`` — the defensive swap that
+replaces ``AgentCard.skills`` with rich metadata from the adapter's
+loaded skills, falling back to the static stubs on shape mismatch.
+
+The whole point of the helper (vs inline in main.py) is that a future
+adapter author who returns a non-standard ``loaded_skills`` shape
+should NOT silently downgrade their workspace boot to not-configured —
+``setup()`` succeeded, the agent works, only the card's skill metadata
+enrichment is degraded.
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+WORKSPACE_DIR = Path(__file__).resolve().parents[1]
+if str(WORKSPACE_DIR) not in sys.path:
+    sys.path.insert(0, str(WORKSPACE_DIR))
+
+from a2a.types import AgentCard, AgentCapabilities, AgentInterface, AgentSkill
+
+from card_helpers import enrich_card_skills
+
+
+def _make_card(static_skill_names):
+    return AgentCard(
+        name="test-agent",
+        description="test",
+        version="0.0.0",
+        supported_interfaces=[
+            AgentInterface(protocol_binding="https://a2a.g/v1", url="http://x:8000")
+        ],
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        skills=[
+            AgentSkill(id=n, name=n, description=n, tags=[], examples=[])
+            for n in static_skill_names
+        ],
+        default_input_modes=["text/plain"],
+        default_output_modes=["text/plain"],
+    )
+
+
+class _SkillMetadata:
+    """Mimics the adapter-side Skill.metadata shape."""
+    def __init__(self, id, name, description, tags, examples):
+        self.id = id
+        self.name = name
+        self.description = description
+        self.tags = tags
+        self.examples = examples
+
+
+class _Skill:
+    def __init__(self, **kwargs):
+        self.metadata = _SkillMetadata(**kwargs)
+
+
+def test_returns_false_on_none():
+    """No loaded_skills → caller didn't load any → no swap, no log spam."""
+    card = _make_card(["a", "b"])
+    assert enrich_card_skills(card, None) is False
+    # Static stubs preserved.
+    assert [s.id for s in card.skills] == ["a", "b"]
+
+
+def test_returns_false_on_empty_list():
+    """Empty list → same treatment as None: nothing to enrich."""
+    card = _make_card(["a"])
+    assert enrich_card_skills(card, []) is False
+    assert [s.id for s in card.skills] == ["a"]
+
+
+def test_swaps_in_rich_metadata_on_canonical_shape():
+    """The happy path: adapter returns Skill objects with the canonical
+    .metadata shape, card gets the richer descriptions/tags/examples."""
+    card = _make_card(["search"])  # static stub
+    rich = [
+        _Skill(
+            id="search",
+            name="Web Search",
+            description="Search the web for the user's question",
+            tags=["web", "io"],
+            examples=["who won the world cup in 2022?"],
+        ),
+    ]
+    assert enrich_card_skills(card, rich) is True
+    assert len(card.skills) == 1
+    assert card.skills[0].id == "search"
+    assert card.skills[0].name == "Web Search"
+    assert "web" in card.skills[0].tags
+    assert card.skills[0].examples == ["who won the world cup in 2022?"]
+
+
+def test_returns_false_and_keeps_stubs_when_metadata_attr_missing(capsys):
+    """Defensive: a future adapter that returns objects without
+    ``.metadata`` would otherwise raise AttributeError and propagate to
+    main.py's outer except — silently degrading an OK boot to
+    not-configured. Helper logs + returns False instead, static stubs
+    stay in place.
+
+    This is the reason the helper exists at all; without it the
+    inline swap in main.py at PR #2756 was a coupling between adapter
+    discipline and tenant-facing readiness."""
+    card = _make_card(["a"])
+
+    class NoMetadata:
+        id = "x"  # has id but no .metadata.id (the canonical path)
+
+    assert enrich_card_skills(card, [NoMetadata()]) is False
+    # Static stub preserved.
+    assert [s.id for s in card.skills] == ["a"]
+    # Operator gets a log line.
+    captured = capsys.readouterr()
+    assert "skill metadata enrichment failed" in captured.out
+
+
+def test_returns_false_when_metadata_is_partial(capsys):
+    """Partial shape — has .metadata but the .metadata object lacks one
+    of the canonical attrs (here: ``examples``). The list comprehension
+    raises AttributeError on ``skill.metadata.examples`` access, which
+    the helper swallows. (In production, a2a.types.AgentSkill is a
+    Pydantic model that ALSO raises on missing required fields — both
+    failure modes route through the same except branch.)"""
+    card = _make_card(["a"])
+
+    class PartialMeta:
+        def __init__(self):
+            self.id = "x"
+            self.name = "x"
+            self.description = "x"
+            self.tags = []
+            # examples missing
+
+    class PartialSkill:
+        def __init__(self):
+            self.metadata = PartialMeta()
+
+    result = enrich_card_skills(card, [PartialSkill()])
+    assert result is False
+    assert [s.id for s in card.skills] == ["a"]
+    captured = capsys.readouterr()
+    assert "skill metadata enrichment failed" in captured.out
+
+
+def test_failure_is_atomic_no_partial_swap(capsys):
+    """If the second skill is malformed, the FIRST skill's swap must NOT
+    leak into card.skills. We use a list-comprehension which builds the
+    full list before assignment; verify that property holds.
+
+    Without this property, a misbehaving adapter could half-corrupt the
+    card — operators would see "1 skill listed" when 3 were declared,
+    no log line if the inline swap was partial."""
+    card = _make_card(["a", "b"])
+
+    valid = _Skill(id="x", name="x", description="x", tags=[], examples=[])
+
+    class BadSkill:
+        # No .metadata at all.
+        pass
+
+    assert enrich_card_skills(card, [valid, BadSkill()]) is False
+    # Original two static stubs intact — card.skills was never reassigned.
+    assert [s.id for s in card.skills] == ["a", "b"]