molecule-core/workspace/tests/test_delegation.py
Hongming Wang 057876cb0c fix(delegation): runtime handles 202+queued; canvas surfaces delegation rows
Two bugs that compounded into the "Director does the work itself" UX:

1. workspace/builtin_tools/delegation.py: _execute_delegation only
   handled HTTP 200 in the response branch. When the peer's a2a-proxy
   returned HTTP 202 + {queued: true} (single-SDK-session bottleneck
   on the peer), the loop fell through. Two iterations later the
   `if "error" in result` check tried to access an unbound `result`,
   the goroutine ended quietly, and the delegation stayed at FAILED
   with error="None". The LLM checking status saw "failed" + the
   platform's "Delegation queued — target at capacity" log line in
   chat context, concluded the peer was permanently unavailable, and
   bypassed delegation to do the work itself.

   Fix: explicit 202+queued branch. Adds DelegationStatus.QUEUED,
   marks the local delegation as QUEUED, mirrors to the platform,
   and returns cleanly without retrying. The retry loop is for
   transient transport errors — queueing is a real ack, not a failure
   to retry against (retrying would just re-queue the same task).

   check_delegation_status docstring extended with explicit per-status
   guidance: pending/in_progress → wait, queued → wait (peer busy on
   prior task, reply WILL arrive), completed → use result, failed →
   real error in error field; only fall back on failed, never queued.

2. canvas/src/components/tabs/chat/AgentCommsPanel.tsx: filter dropped
   every delegation row because it whitelisted only a2a_send /
   a2a_receive. activity_type='delegation' rows (written by the
   platform's /delegate handler with method='delegate' or
   'delegate_result') never reached toCommMessage. User saw "No
   agent-to-agent communications yet" while 6+ delegations existed
   in the DB.

   Fix: include "delegation" in the both the initial filter and the
   WS push filter, plus a delegation branch in toCommMessage that
   maps the row as outbound (always — platform proxies on our behalf)
   and uses summary as the primary text source.

Tests:
  - 3 new Python tests cover the 202+queued path: status becomes
    QUEUED not FAILED; no retry on queued (counted by URL match
    against the A2A target since the mock is shared across all
    AsyncClient calls); bare 202 without {queued:true} still
    falls through to the existing retry-then-FAILED path.
  - 3 new TS tests cover the delegation mapper: 'delegate' row
    maps as outbound to target with summary text; queued
    'delegate_result' preserves status='queued' (load-bearing for
    the LLM's wait-vs-bypass decision); missing target_id returns
    null instead of rendering a ghost.

Does NOT solve: the underlying single-SDK-session bottleneck that
causes peers to queue in the first place. Tracked as task #102
(parallel SDK sessions per workspace) — real architectural work.
This PR makes the runtime handle the queueing correctly so the LLM
doesn't bail out, and makes the delegations visible in Agent Comms
so operators can see what's happening.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 15:01:50 -07:00

489 lines
17 KiB
Python

"""Tests for tools/delegation.py (async delegation model).
The delegation tool now returns immediately with a task_id and runs the
A2A request in the background. Tests verify:
1. Immediate return with task_id
2. Background task completion
3. check_delegation_status retrieval
4. Error handling (RBAC, discovery, network)
"""
import asyncio
import importlib.util
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_mock_client(
discover_status=200,
discover_payload=None,
discover_exc=None,
a2a_status=200,
a2a_payload=None,
):
"""Return (mock_client, mock_client_class) for patching httpx.AsyncClient."""
if discover_payload is None:
discover_payload = {"url": "http://peer:8000"}
if a2a_payload is None:
a2a_payload = {
"result": {
"parts": [{"kind": "text", "text": "done"}],
"artifacts": [],
}
}
mock_resp_discover = MagicMock()
mock_resp_discover.status_code = discover_status
mock_resp_discover.json.return_value = discover_payload
mock_resp_a2a = MagicMock()
mock_resp_a2a.status_code = a2a_status
mock_resp_a2a.json.return_value = a2a_payload
mock_client = AsyncMock()
if discover_exc:
mock_client.get = AsyncMock(side_effect=discover_exc)
else:
mock_client.get = AsyncMock(return_value=mock_resp_discover)
mock_client.post = AsyncMock(return_value=mock_resp_a2a)
mock_cls = MagicMock()
mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
return mock_client, mock_cls
@pytest.fixture
def delegation_mocks(monkeypatch):
"""Load the real delegation module with mocked dependencies."""
mock_audit = MagicMock()
mock_audit.check_permission = MagicMock(return_value=True)
mock_audit.get_workspace_roles = MagicMock(return_value=(["operator"], {}))
mock_audit.log_event = MagicMock()
mock_span = MagicMock()
mock_span.set_attribute = MagicMock()
mock_span.record_exception = MagicMock()
mock_span.__enter__ = MagicMock(return_value=mock_span)
mock_span.__exit__ = MagicMock(return_value=False)
mock_tracer = MagicMock()
mock_tracer.start_as_current_span = MagicMock(return_value=mock_span)
mock_telemetry = MagicMock()
mock_telemetry.get_tracer = MagicMock(return_value=mock_tracer)
mock_telemetry.inject_trace_headers = MagicMock(side_effect=lambda h: h)
mock_telemetry.get_current_traceparent = MagicMock(return_value="")
for attr in ["A2A_SOURCE_WORKSPACE", "A2A_TARGET_WORKSPACE", "A2A_TASK_ID", "WORKSPACE_ID_ATTR"]:
setattr(mock_telemetry, attr, attr)
monkeypatch.setitem(sys.modules, "builtin_tools.audit", mock_audit)
monkeypatch.setitem(sys.modules, "builtin_tools.telemetry", mock_telemetry)
monkeypatch.setenv("WORKSPACE_ID", "ws-self")
monkeypatch.setenv("PLATFORM_URL", "http://test:8080")
spec = importlib.util.spec_from_file_location(
"builtin_tools.delegation",
os.path.join(os.path.dirname(__file__), "..", "builtin_tools", "delegation.py"),
)
mod = importlib.util.module_from_spec(spec)
monkeypatch.setitem(sys.modules, "builtin_tools.delegation", mod)
spec.loader.exec_module(mod)
mod.DELEGATION_RETRY_ATTEMPTS = 2
mod.DELEGATION_RETRY_DELAY = 0.0
# Clear state between tests
mod._delegations.clear()
mod._background_tasks.clear()
return mod, mock_audit, mock_telemetry, mock_span
async def _invoke(mod, workspace_id="target", task="do stuff"):
"""Call delegate_to_workspace and return the immediate result."""
fn = mod.delegate_to_workspace
if hasattr(fn, "ainvoke"):
return await fn.ainvoke({"workspace_id": workspace_id, "task": task})
return await fn(workspace_id=workspace_id, task=task)
async def _invoke_and_wait(mod, workspace_id="target", task="do stuff"):
"""Call delegate_to_workspace, wait for background task, return status."""
result = await _invoke(mod, workspace_id, task)
# Wait for all background tasks to complete
if mod._background_tasks:
await asyncio.gather(*mod._background_tasks, return_exceptions=True)
# Get final status
if "task_id" in result:
fn = mod.check_delegation_status
if hasattr(fn, "ainvoke"):
return await fn.ainvoke({"task_id": result["task_id"]})
return await fn(task_id=result["task_id"])
return result
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestRBAC:
@pytest.mark.asyncio
async def test_rbac_deny(self, delegation_mocks):
mod, mock_audit, *_ = delegation_mocks
mock_audit.check_permission.return_value = False
result = await _invoke(mod)
assert result["success"] is False
assert "RBAC" in result["error"]
class TestAsyncDelegation:
@pytest.mark.asyncio
async def test_returns_immediately_with_task_id(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client()
with patch("httpx.AsyncClient", mock_cls):
result = await _invoke(mod)
assert result["success"] is True
assert "task_id" in result
assert result["status"] == "delegated"
@pytest.mark.asyncio
async def test_background_task_completes(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client()
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "completed"
assert "done" in status["result"]
@pytest.mark.asyncio
async def test_check_delegation_list_all(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client()
with patch("httpx.AsyncClient", mock_cls):
await _invoke(mod, workspace_id="ws-a", task="task A")
await _invoke(mod, workspace_id="ws-b", task="task B")
fn = mod.check_delegation_status
if hasattr(fn, "ainvoke"):
result = await fn.ainvoke({"task_id": ""})
else:
result = await fn(task_id="")
assert result["count"] == 2
@pytest.mark.asyncio
async def test_check_delegation_not_found(self, delegation_mocks):
mod, *_ = delegation_mocks
fn = mod.check_delegation_status
if hasattr(fn, "ainvoke"):
result = await fn.ainvoke({"task_id": "nonexistent"})
else:
result = await fn(task_id="nonexistent")
assert "error" in result
class TestDiscovery:
@pytest.mark.asyncio
async def test_discovery_403(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(discover_status=403)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed"
assert "Discovery failed" in status.get("error", "")
@pytest.mark.asyncio
async def test_discovery_404(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(discover_status=404)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed"
@pytest.mark.asyncio
async def test_discovery_no_url(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(discover_payload={"url": ""})
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed"
assert "No URL" in status.get("error", "")
@pytest.mark.asyncio
async def test_discovery_exception(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(discover_exc=Exception("dns fail"))
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed"
assert "dns fail" in status.get("error", "")
class TestA2ASuccess:
@pytest.mark.asyncio
async def test_success_with_parts(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(
a2a_payload={"result": {"parts": [{"kind": "text", "text": "hello world"}]}}
)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "completed"
assert "hello world" in status["result"]
@pytest.mark.asyncio
async def test_success_with_artifacts(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(
a2a_payload={
"result": {
"artifacts": [{"parts": [{"kind": "text", "text": "artifact text"}]}],
"parts": [],
}
}
)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "completed"
assert "artifact text" in status["result"]
class TestA2AQueued:
"""HTTP 202 + {queued: true} comes back when the peer's a2a-proxy
accepted the request but the peer is mid-task. Pre-fix the runtime
treated this as 'no 200 → fall through to FAILED', which led the
LLM to conclude the peer was permanently unavailable and bypass
delegation entirely. Post-fix the status is QUEUED and the LLM
sees explicit guidance to wait."""
@pytest.mark.asyncio
async def test_queued_marks_status_queued_not_failed(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(
a2a_status=202,
a2a_payload={"queued": True, "summary": "Delegation queued — target at capacity"},
)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "queued", f"expected queued, got {status}"
# No 'error' field on queued (it's not a failure)
assert "error" not in status or not status.get("error")
@pytest.mark.asyncio
async def test_queued_does_not_retry(self, delegation_mocks):
# The retry loop is for transient transport errors. A 202+queued
# is NOT a failure to retry against — the platform's drain will
# deliver the eventual reply. Retrying would just re-queue the
# same task and double-count it.
mod, *_ = delegation_mocks
client, mock_cls = _make_mock_client(
a2a_status=202,
a2a_payload={"queued": True},
)
with patch("httpx.AsyncClient", mock_cls):
await _invoke_and_wait(mod)
# The mock is shared across all AsyncClient calls (record, A2A,
# notify, update), so total post count includes platform-sync
# bookkeeping POSTs too. Only count the A2A POST itself —
# identified by URL matching the target's /a2a endpoint.
a2a_calls = [
c for c in client.post.await_args_list
if c.args and c.args[0] == "http://peer:8000"
]
assert len(a2a_calls) == 1, (
f"queued should not retry the A2A POST; got {len(a2a_calls)} A2A calls"
)
@pytest.mark.asyncio
async def test_202_without_queued_flag_falls_through(self, delegation_mocks):
# A bare 202 with no {queued: true} marker is NOT the platform's
# queue signal — could be a misbehaving proxy or a future protocol
# revision. Don't treat it as queued. Falls through to the existing
# retry-then-FAILED path.
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(
a2a_status=202,
a2a_payload={"some_other_field": "value"},
)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed", (
f"bare 202 should not be treated as queued, expected failed, got {status}"
)
class TestA2AErrors:
@pytest.mark.asyncio
async def test_rpc_error(self, delegation_mocks):
mod, *_ = delegation_mocks
_, mock_cls = _make_mock_client(
a2a_payload={"error": {"message": "internal error"}}
)
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed"
@pytest.mark.asyncio
async def test_network_error(self, delegation_mocks):
mod, *_ = delegation_mocks
mock_client, mock_cls = _make_mock_client()
mock_client.post = AsyncMock(side_effect=httpx.ConnectError("refused"))
with patch("httpx.AsyncClient", mock_cls):
status = await _invoke_and_wait(mod)
assert status["status"] == "failed"
assert "refused" in status.get("error", "")
# ---------- #64: platform-mirroring helpers ----------
import asyncio as _asyncio_64
from unittest.mock import AsyncMock as _AsyncMock_64, patch as _patch_64
def test_record_delegation_on_platform_fires_http_post(delegation_mocks):
"""Agent registers the delegation on the platform so GET /delegations sees it."""
mod, _, _, _ = delegation_mocks
calls = []
class FakeClient:
def __init__(self, *a, **kw): pass
async def __aenter__(self): return self
async def __aexit__(self, *a): return False
async def post(self, url, json=None):
calls.append({"url": url, "json": json})
class R:
status_code = 202
return R()
with _patch_64.object(mod.httpx, "AsyncClient", FakeClient):
with _patch_64.object(mod, "WORKSPACE_ID", "src-ws"), \
_patch_64.object(mod, "PLATFORM_URL", "http://platform"):
_asyncio_64.run(
mod._record_delegation_on_platform("task-1", "target-ws", "hello")
)
assert len(calls) == 1
assert calls[0]["url"] == "http://platform/workspaces/src-ws/delegations/record"
body = calls[0]["json"]
assert body == {"target_id": "target-ws", "task": "hello", "delegation_id": "task-1"}
def test_record_delegation_on_platform_best_effort_on_error(delegation_mocks):
"""Platform unreachable must NOT block the A2A delegation path."""
mod, _, _, _ = delegation_mocks
class FailingClient:
def __init__(self, *a, **kw): pass
async def __aenter__(self): return self
async def __aexit__(self, *a): return False
async def post(self, *a, **kw):
raise RuntimeError("platform unreachable")
with _patch_64.object(mod.httpx, "AsyncClient", FailingClient):
with _patch_64.object(mod, "WORKSPACE_ID", "src-ws"), \
_patch_64.object(mod, "PLATFORM_URL", "http://platform"):
# Must not raise
_asyncio_64.run(
mod._record_delegation_on_platform("task-1", "target-ws", "hello")
)
def test_update_delegation_on_platform_completed(delegation_mocks):
mod, _, _, _ = delegation_mocks
calls = []
class FakeClient:
def __init__(self, *a, **kw): pass
async def __aenter__(self): return self
async def __aexit__(self, *a): return False
async def post(self, url, json=None):
calls.append({"url": url, "json": json})
class R:
status_code = 200
return R()
with _patch_64.object(mod.httpx, "AsyncClient", FakeClient):
with _patch_64.object(mod, "WORKSPACE_ID", "src-ws"), \
_patch_64.object(mod, "PLATFORM_URL", "http://platform"):
_asyncio_64.run(
mod._update_delegation_on_platform(
"task-1", "completed", "", "the result text"
)
)
assert calls[0]["url"] == "http://platform/workspaces/src-ws/delegations/task-1/update"
assert calls[0]["json"]["status"] == "completed"
assert calls[0]["json"]["response_preview"] == "the result text"
def test_update_delegation_on_platform_truncates_large_preview(delegation_mocks):
"""500-char cap protects log volume + mirrors the platform's 300-char truncate."""
mod, _, _, _ = delegation_mocks
calls = []
class FakeClient:
def __init__(self, *a, **kw): pass
async def __aenter__(self): return self
async def __aexit__(self, *a): return False
async def post(self, url, json=None):
calls.append({"url": url, "json": json})
class R:
status_code = 200
return R()
huge = "X" * 10000
with _patch_64.object(mod.httpx, "AsyncClient", FakeClient):
with _patch_64.object(mod, "WORKSPACE_ID", "src-ws"), \
_patch_64.object(mod, "PLATFORM_URL", "http://platform"):
_asyncio_64.run(
mod._update_delegation_on_platform("task-1", "completed", "", huge)
)
assert len(calls[0]["json"]["response_preview"]) == 500