From d8fc66e9bd27952d404d967f5d2ea2b1346d88ab Mon Sep 17 00:00:00 2001 From: Molecule AI Infra-SRE Date: Mon, 11 May 2026 15:42:05 +0000 Subject: [PATCH 1/2] fix(harness-replays): use github.sha not branch-name as HEAD for push events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: PR #497 used HEAD="${GITHUB_REF#refs/heads/}" (branch name) for push events. Gitea Compare API returns empty divergent-commits when comparing a SHA (github.event.before) with a branch name (main) in a linear history — the branch IS the commit's ancestor, so there are zero divergent commits. Fix: HEAD="${{ github.sha }}" — both BASE and HEAD are SHAs, so the API returns the pushed commits' changed files. Verified: compare/{before_sha}...{current_sha} correctly returns pushed commits on a linear push. For PR events, branch names still work correctly (the Compare API handles branch-vs-branch comparisons fine). Closes regression introduced in PR #497. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/harness-replays.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.gitea/workflows/harness-replays.yml b/.gitea/workflows/harness-replays.yml index 380892fe..f26226b1 100644 --- a/.gitea/workflows/harness-replays.yml +++ b/.gitea/workflows/harness-replays.yml @@ -87,20 +87,19 @@ jobs: # Determine base and head refs for the Compare API call. # Gitea Compare API accepts branch names OR commit SHAs as base/head. # Pull request: base.ref + head.ref are in the event payload (branch names). - # Push: github.event.before (SHA of previous tip) as BASE, $GITHUB_REF - # (branch name) as HEAD. These are different, so the Compare API - # returns the actual diff — unlike the broken form which set both - # BASE and HEAD to the same branch name, making - # "compare/main...main" always return zero files. + # Push: github.event.before (SHA of previous tip) as BASE, github.sha + # (current tip SHA) as HEAD. Both SHAs means the API returns the + # pushed commits (unlike branch-name HEAD which returns empty in + # linear-history push events — Gitea treats SHA-vs-branch as + # "show divergent commits" and a linear push has zero of those). if [ "${{ github.event_name }}" = "pull_request" ]; then BASE="${{ github.event.pull_request.base.ref }}" HEAD="${{ github.event.pull_request.head.ref }}" elif [ -n "${{ github.event.before }}" ] && \ ! echo "${{ github.event.before }}" | grep -qE '^0+$'; then - # Push event: BASE = previous tip (SHA), HEAD = current branch name. + # Push event: BASE = previous tip (SHA), HEAD = current tip (SHA). BASE="${{ github.event.before }}" - HEAD_REF="${GITHUB_REF#refs/heads/}" - HEAD="${HEAD_REF:-main}" + HEAD="${{ github.sha }}" else # New branch or github.event.before unavailable — run everything. echo "run=true" >> "$GITHUB_OUTPUT" -- 2.45.2 From af2a777200334911758d5425c1c60f96363e263c Mon Sep 17 00:00:00 2001 From: Molecule AI Infra-SRE Date: Mon, 11 May 2026 15:59:06 +0000 Subject: [PATCH 2/2] fix: update test assertions for OFFSEC-003 boundary wrapping (PR #477) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #477 added _A2A_BOUNDARY_START/END wrapping to tool_delegate_task's success path. Three tests in test_delegation_sync_via_polling.py were checking exact result strings (no boundary markers) and started failing: - test_flag_off_uses_send_a2a_message_not_polling: "legacy ok" - test_queued_sentinel_triggers_polling_fallback: "real response from..." - test_non_queued_send_result_does_not_trigger_fallback: "normal reply" All three now assert for boundary markers + inner content. The error-path test (DELEGATION FAILED) is unaffected — errors bypass the wrapping. Co-Authored-By: Claude Opus 4.7 --- .../tests/test_delegation_sync_via_polling.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/workspace/tests/test_delegation_sync_via_polling.py b/workspace/tests/test_delegation_sync_via_polling.py index 018d572a..6fb14d6a 100644 --- a/workspace/tests/test_delegation_sync_via_polling.py +++ b/workspace/tests/test_delegation_sync_via_polling.py @@ -64,10 +64,12 @@ class TestFlagOffLegacyPath: async def test_flag_off_uses_send_a2a_message_not_polling(self, monkeypatch): """With DELEGATION_SYNC_VIA_INBOX unset, tool_delegate_task must - invoke the legacy send_a2a_message and NEVER call /delegate.""" + invoke the legacy send_a2a_message and NEVER call /delegate. + Result is wrapped in _A2A_BOUNDARY_START/END (OFFSEC-003, PR #477).""" monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False) import a2a_tools + from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START send_calls = [] async def fake_send(workspace_id, task, source_workspace_id=None): @@ -88,7 +90,10 @@ class TestFlagOffLegacyPath: "ws-target", "task body", source_workspace_id="ws-self" ) - assert result == "legacy ok", f"expected legacy passthrough, got {result!r}" + # OFFSEC-003: result is wrapped in boundary markers + assert _A2A_BOUNDARY_START in result + assert _A2A_BOUNDARY_END in result + assert "legacy ok" in result assert send_calls == [("ws-target", "task body", "ws-self")] poll_mock.assert_not_called() @@ -119,6 +124,7 @@ class TestPollModeAutoFallback: monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False) import a2a_tools + from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START from a2a_client import _A2A_QUEUED_PREFIX send_calls = [] @@ -152,8 +158,10 @@ class TestPollModeAutoFallback: assert len(poll_calls) == 1 assert poll_calls[0] == ("ws-target", "task body", "ws-self") # Caller sees the real reply, NOT the queued sentinel and NOT - # a DELEGATION FAILED string. - assert result == "real response from poll-mode peer" + # a DELEGATION FAILED string. Wrapped in OFFSEC-003 boundary markers. + assert _A2A_BOUNDARY_START in result + assert _A2A_BOUNDARY_END in result + assert "real response from poll-mode peer" in result async def test_non_queued_send_result_does_not_trigger_fallback(self, monkeypatch): # Push-mode peer returns a normal text reply — fallback path @@ -161,6 +169,7 @@ class TestPollModeAutoFallback: monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False) import a2a_tools + from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START async def fake_send(*_a, **_kw): return "normal reply" @@ -179,7 +188,10 @@ class TestPollModeAutoFallback: "ws-target", "task", source_workspace_id="ws-self" ) - assert result == "normal reply" + # OFFSEC-003: wrapped in boundary markers + assert _A2A_BOUNDARY_START in result + assert _A2A_BOUNDARY_END in result + assert "normal reply" in result poll_mock.assert_not_called() async def test_error_send_result_does_not_trigger_fallback(self, monkeypatch): -- 2.45.2