Adds lastmile-ai/mcp-eval configuration and 4 test suites: - .mcp-eval/mcpeval.yaml — stdio config, 98% success-rate + 1s P95 thresholds - test_list_tools.yaml — core workspace + peer tools reachable, latency < 500ms - test_memory_tools.yaml — memory_set → memory_get round-trip + HMA commit/search - test_a2a_tools.yaml — list_peers, async_delegate (task_id), check_delegations - test_approval_tool.yaml — approval CRUD tools schema + latency NOTE: .github/workflows/mcp-eval.yml requires 'workflows' scope — must be committed by a human with that permission. Workflow content is in the PR description. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
24 lines
625 B
YAML
24 lines
625 B
YAML
# mcp-eval configuration for @molecule-ai/mcp-server
|
|
# Run: mcp-eval run .mcp-eval/tests/ --json mcp-eval-results.json
|
|
# Docs: https://github.com/lastmile-ai/mcp-eval
|
|
|
|
provider: anthropic
|
|
model: claude-opus-4-7
|
|
|
|
mcp:
|
|
servers:
|
|
molecule_mcp:
|
|
command: "npx"
|
|
args: ["-y", "@molecule-ai/mcp-server"]
|
|
env:
|
|
MOLECULE_URL: "${MOLECULE_URL:-http://localhost:8080}"
|
|
|
|
thresholds:
|
|
success_rate_min: 0.98 # ≥ 98% tool calls must succeed
|
|
latency_p95_max_ms: 1000 # P95 latency < 1 s
|
|
latency_p50_max_ms: 300 # P50 latency < 300 ms
|
|
|
|
execution:
|
|
timeout_seconds: 60
|
|
max_concurrency: 3
|