Adds lastmile-ai/mcp-eval configuration and 4 test suites: - .mcp-eval/mcpeval.yaml — stdio config, 98% success-rate + 1s P95 thresholds - test_list_tools.yaml — core workspace + peer tools reachable, latency < 500ms - test_memory_tools.yaml — memory_set → memory_get round-trip + HMA commit/search - test_a2a_tools.yaml — list_peers, async_delegate (task_id), check_delegations - test_approval_tool.yaml — approval CRUD tools schema + latency NOTE: .github/workflows/mcp-eval.yml requires 'workflows' scope — must be committed by a human with that permission. Workflow content is in the PR description. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
49 lines
1.2 KiB
YAML
49 lines
1.2 KiB
YAML
# Gate: A2A delegation and peer-discovery tools
|
|
# list_peers must return a list structure; async_delegate must return a task_id.
|
|
|
|
name: a2a_tools
|
|
description: >
|
|
Verifies the core A2A communication tools: peer discovery (list_peers),
|
|
async delegation (async_delegate → task_id), delegation status check
|
|
(check_delegations), and access-check enforcement (check_access).
|
|
|
|
steps:
|
|
- name: list_peers_returns_list
|
|
tool: list_peers
|
|
input: {}
|
|
assertions:
|
|
- type: no_error
|
|
- type: response_type
|
|
expected: list_or_empty
|
|
- type: latency_ms
|
|
max: 500
|
|
|
|
- name: async_delegate_returns_task_id
|
|
tool: async_delegate
|
|
input:
|
|
task: "mcp-eval smoke test — no-op"
|
|
assertions:
|
|
- type: no_error
|
|
- type: contains_key
|
|
key: "task_id"
|
|
- type: latency_ms
|
|
max: 1000
|
|
|
|
- name: check_delegations_reachable
|
|
tool: check_delegations
|
|
input: {}
|
|
assertions:
|
|
- type: no_error
|
|
- type: latency_ms
|
|
max: 500
|
|
|
|
- name: check_access_reachable
|
|
tool: check_access
|
|
input:
|
|
source_workspace_id: "test:mcp-eval"
|
|
target_workspace_id: "test:mcp-eval"
|
|
assertions:
|
|
- type: no_error
|
|
- type: latency_ms
|
|
max: 500
|