diff --git a/.gitea/workflows/harness-replays.yml b/.gitea/workflows/harness-replays.yml index 380892fe..f26226b1 100644 --- a/.gitea/workflows/harness-replays.yml +++ b/.gitea/workflows/harness-replays.yml @@ -87,20 +87,19 @@ jobs: # Determine base and head refs for the Compare API call. # Gitea Compare API accepts branch names OR commit SHAs as base/head. # Pull request: base.ref + head.ref are in the event payload (branch names). - # Push: github.event.before (SHA of previous tip) as BASE, $GITHUB_REF - # (branch name) as HEAD. These are different, so the Compare API - # returns the actual diff — unlike the broken form which set both - # BASE and HEAD to the same branch name, making - # "compare/main...main" always return zero files. + # Push: github.event.before (SHA of previous tip) as BASE, github.sha + # (current tip SHA) as HEAD. Both SHAs means the API returns the + # pushed commits (unlike branch-name HEAD which returns empty in + # linear-history push events — Gitea treats SHA-vs-branch as + # "show divergent commits" and a linear push has zero of those). if [ "${{ github.event_name }}" = "pull_request" ]; then BASE="${{ github.event.pull_request.base.ref }}" HEAD="${{ github.event.pull_request.head.ref }}" elif [ -n "${{ github.event.before }}" ] && \ ! echo "${{ github.event.before }}" | grep -qE '^0+$'; then - # Push event: BASE = previous tip (SHA), HEAD = current branch name. + # Push event: BASE = previous tip (SHA), HEAD = current tip (SHA). BASE="${{ github.event.before }}" - HEAD_REF="${GITHUB_REF#refs/heads/}" - HEAD="${HEAD_REF:-main}" + HEAD="${{ github.sha }}" else # New branch or github.event.before unavailable — run everything. echo "run=true" >> "$GITHUB_OUTPUT" diff --git a/workspace/tests/test_delegation_sync_via_polling.py b/workspace/tests/test_delegation_sync_via_polling.py index 018d572a..6fb14d6a 100644 --- a/workspace/tests/test_delegation_sync_via_polling.py +++ b/workspace/tests/test_delegation_sync_via_polling.py @@ -64,10 +64,12 @@ class TestFlagOffLegacyPath: async def test_flag_off_uses_send_a2a_message_not_polling(self, monkeypatch): """With DELEGATION_SYNC_VIA_INBOX unset, tool_delegate_task must - invoke the legacy send_a2a_message and NEVER call /delegate.""" + invoke the legacy send_a2a_message and NEVER call /delegate. + Result is wrapped in _A2A_BOUNDARY_START/END (OFFSEC-003, PR #477).""" monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False) import a2a_tools + from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START send_calls = [] async def fake_send(workspace_id, task, source_workspace_id=None): @@ -88,7 +90,10 @@ class TestFlagOffLegacyPath: "ws-target", "task body", source_workspace_id="ws-self" ) - assert result == "legacy ok", f"expected legacy passthrough, got {result!r}" + # OFFSEC-003: result is wrapped in boundary markers + assert _A2A_BOUNDARY_START in result + assert _A2A_BOUNDARY_END in result + assert "legacy ok" in result assert send_calls == [("ws-target", "task body", "ws-self")] poll_mock.assert_not_called() @@ -119,6 +124,7 @@ class TestPollModeAutoFallback: monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False) import a2a_tools + from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START from a2a_client import _A2A_QUEUED_PREFIX send_calls = [] @@ -152,8 +158,10 @@ class TestPollModeAutoFallback: assert len(poll_calls) == 1 assert poll_calls[0] == ("ws-target", "task body", "ws-self") # Caller sees the real reply, NOT the queued sentinel and NOT - # a DELEGATION FAILED string. - assert result == "real response from poll-mode peer" + # a DELEGATION FAILED string. Wrapped in OFFSEC-003 boundary markers. + assert _A2A_BOUNDARY_START in result + assert _A2A_BOUNDARY_END in result + assert "real response from poll-mode peer" in result async def test_non_queued_send_result_does_not_trigger_fallback(self, monkeypatch): # Push-mode peer returns a normal text reply — fallback path @@ -161,6 +169,7 @@ class TestPollModeAutoFallback: monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False) import a2a_tools + from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START async def fake_send(*_a, **_kw): return "normal reply" @@ -179,7 +188,10 @@ class TestPollModeAutoFallback: "ws-target", "task", source_workspace_id="ws-self" ) - assert result == "normal reply" + # OFFSEC-003: wrapped in boundary markers + assert _A2A_BOUNDARY_START in result + assert _A2A_BOUNDARY_END in result + assert "normal reply" in result poll_mock.assert_not_called() async def test_error_send_result_does_not_trigger_fallback(self, monkeypatch):