diff --git a/workspace/tests/test_delegation_sync_via_polling.py b/workspace/tests/test_delegation_sync_via_polling.py index 018d572a..e68f4cbe 100644 --- a/workspace/tests/test_delegation_sync_via_polling.py +++ b/workspace/tests/test_delegation_sync_via_polling.py @@ -30,6 +30,10 @@ import pytest os.environ.setdefault("WORKSPACE_ID", "00000000-0000-0000-0000-000000000001") os.environ.setdefault("PLATFORM_URL", "http://localhost:8080") +# OFFSEC-003: tool_delegate_task wraps non-error results in boundary markers +# so the agent can distinguish trusted own output from untrusted peer content. +from a2a_tools_delegation import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START + def _resp(status_code, payload, text=None): r = MagicMock() @@ -88,7 +92,10 @@ class TestFlagOffLegacyPath: "ws-target", "task body", source_workspace_id="ws-self" ) - assert result == "legacy ok", f"expected legacy passthrough, got {result!r}" + # OFFSEC-003: boundary wrapping is applied by tool_delegate_task even on + # the legacy send_a2a_message path (sanitize then wrap at line 333-334). + assert _A2A_BOUNDARY_START in result and "legacy ok" in result, \ + f"expected boundary-wrapped legacy ok, got {result!r}" assert send_calls == [("ws-target", "task body", "ws-self")] poll_mock.assert_not_called() @@ -153,7 +160,10 @@ class TestPollModeAutoFallback: assert poll_calls[0] == ("ws-target", "task body", "ws-self") # Caller sees the real reply, NOT the queued sentinel and NOT # a DELEGATION FAILED string. - assert result == "real response from poll-mode peer" + # OFFSEC-003: _delegate_sync_via_polling returns sanitized plain text; + # tool_delegate_task wraps it in boundary markers before returning. + assert _A2A_BOUNDARY_START in result and "real response from poll-mode peer" in result, \ + f"expected boundary-wrapped response, got {result!r}" async def test_non_queued_send_result_does_not_trigger_fallback(self, monkeypatch): # Push-mode peer returns a normal text reply — fallback path @@ -179,7 +189,9 @@ class TestPollModeAutoFallback: "ws-target", "task", source_workspace_id="ws-self" ) - assert result == "normal reply" + # OFFSEC-003: boundary wrapping applied by tool_delegate_task before return. + assert _A2A_BOUNDARY_START in result and "normal reply" in result, \ + f"expected boundary-wrapped normal reply, got {result!r}" poll_mock.assert_not_called() async def test_error_send_result_does_not_trigger_fallback(self, monkeypatch):