From d8fc66e9bd27952d404d967f5d2ea2b1346d88ab Mon Sep 17 00:00:00 2001
From: Molecule AI Infra-SRE <infra-sre@agents.moleculesai.app>
Date: Mon, 11 May 2026 15:42:05 +0000
Subject: [PATCH 1/2] fix(harness-replays): use github.sha not branch-name as
 HEAD for push events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause: PR #497 used HEAD="${GITHUB_REF#refs/heads/}" (branch name) for
push events. Gitea Compare API returns empty divergent-commits when comparing
a SHA (github.event.before) with a branch name (main) in a linear history —
the branch IS the commit's ancestor, so there are zero divergent commits.

Fix: HEAD="${{ github.sha }}" — both BASE and HEAD are SHAs, so the API
returns the pushed commits' changed files.

Verified: compare/{before_sha}...{current_sha} correctly returns pushed commits
on a linear push. For PR events, branch names still work correctly (the Compare
API handles branch-vs-branch comparisons fine).

Closes regression introduced in PR #497.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .gitea/workflows/harness-replays.yml | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/.gitea/workflows/harness-replays.yml b/.gitea/workflows/harness-replays.yml
index 380892fe..f26226b1 100644
--- a/.gitea/workflows/harness-replays.yml
+++ b/.gitea/workflows/harness-replays.yml
@@ -87,20 +87,19 @@ jobs:
           # Determine base and head refs for the Compare API call.
           # Gitea Compare API accepts branch names OR commit SHAs as base/head.
           # Pull request: base.ref + head.ref are in the event payload (branch names).
-          # Push: github.event.before (SHA of previous tip) as BASE, $GITHUB_REF
-          #       (branch name) as HEAD. These are different, so the Compare API
-          #       returns the actual diff — unlike the broken form which set both
-          #       BASE and HEAD to the same branch name, making
-          #       "compare/main...main" always return zero files.
+          # Push: github.event.before (SHA of previous tip) as BASE, github.sha
+          #       (current tip SHA) as HEAD. Both SHAs means the API returns the
+          #       pushed commits (unlike branch-name HEAD which returns empty in
+          #       linear-history push events — Gitea treats SHA-vs-branch as
+          #       "show divergent commits" and a linear push has zero of those).
           if [ "${{ github.event_name }}" = "pull_request" ]; then
             BASE="${{ github.event.pull_request.base.ref }}"
             HEAD="${{ github.event.pull_request.head.ref }}"
           elif [ -n "${{ github.event.before }}" ] && \
                ! echo "${{ github.event.before }}" | grep -qE '^0+$'; then
-            # Push event: BASE = previous tip (SHA), HEAD = current branch name.
+            # Push event: BASE = previous tip (SHA), HEAD = current tip (SHA).
             BASE="${{ github.event.before }}"
-            HEAD_REF="${GITHUB_REF#refs/heads/}"
-            HEAD="${HEAD_REF:-main}"
+            HEAD="${{ github.sha }}"
           else
             # New branch or github.event.before unavailable — run everything.
             echo "run=true" >> "$GITHUB_OUTPUT"
-- 
2.45.2


From af2a777200334911758d5425c1c60f96363e263c Mon Sep 17 00:00:00 2001
From: Molecule AI Infra-SRE <infra-sre@agents.moleculesai.app>
Date: Mon, 11 May 2026 15:59:06 +0000
Subject: [PATCH 2/2] fix: update test assertions for OFFSEC-003 boundary
 wrapping (PR #477)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #477 added _A2A_BOUNDARY_START/END wrapping to tool_delegate_task's
success path. Three tests in test_delegation_sync_via_polling.py were
checking exact result strings (no boundary markers) and started failing:

- test_flag_off_uses_send_a2a_message_not_polling: "legacy ok"
- test_queued_sentinel_triggers_polling_fallback: "real response from..."
- test_non_queued_send_result_does_not_trigger_fallback: "normal reply"

All three now assert for boundary markers + inner content. The error-path
test (DELEGATION FAILED) is unaffected — errors bypass the wrapping.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../tests/test_delegation_sync_via_polling.py | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/workspace/tests/test_delegation_sync_via_polling.py b/workspace/tests/test_delegation_sync_via_polling.py
index 018d572a..6fb14d6a 100644
--- a/workspace/tests/test_delegation_sync_via_polling.py
+++ b/workspace/tests/test_delegation_sync_via_polling.py
@@ -64,10 +64,12 @@ class TestFlagOffLegacyPath:
 
     async def test_flag_off_uses_send_a2a_message_not_polling(self, monkeypatch):
         """With DELEGATION_SYNC_VIA_INBOX unset, tool_delegate_task must
-        invoke the legacy send_a2a_message and NEVER call /delegate."""
+        invoke the legacy send_a2a_message and NEVER call /delegate.
+        Result is wrapped in _A2A_BOUNDARY_START/END (OFFSEC-003, PR #477)."""
         monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False)
 
         import a2a_tools
+        from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START
         send_calls = []
 
         async def fake_send(workspace_id, task, source_workspace_id=None):
@@ -88,7 +90,10 @@ class TestFlagOffLegacyPath:
                 "ws-target", "task body", source_workspace_id="ws-self"
             )
 
-        assert result == "legacy ok", f"expected legacy passthrough, got {result!r}"
+        # OFFSEC-003: result is wrapped in boundary markers
+        assert _A2A_BOUNDARY_START in result
+        assert _A2A_BOUNDARY_END in result
+        assert "legacy ok" in result
         assert send_calls == [("ws-target", "task body", "ws-self")]
         poll_mock.assert_not_called()
 
@@ -119,6 +124,7 @@ class TestPollModeAutoFallback:
         monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False)
 
         import a2a_tools
+        from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START
         from a2a_client import _A2A_QUEUED_PREFIX
 
         send_calls = []
@@ -152,8 +158,10 @@ class TestPollModeAutoFallback:
         assert len(poll_calls) == 1
         assert poll_calls[0] == ("ws-target", "task body", "ws-self")
         # Caller sees the real reply, NOT the queued sentinel and NOT
-        # a DELEGATION FAILED string.
-        assert result == "real response from poll-mode peer"
+        # a DELEGATION FAILED string. Wrapped in OFFSEC-003 boundary markers.
+        assert _A2A_BOUNDARY_START in result
+        assert _A2A_BOUNDARY_END in result
+        assert "real response from poll-mode peer" in result
 
     async def test_non_queued_send_result_does_not_trigger_fallback(self, monkeypatch):
         # Push-mode peer returns a normal text reply — fallback path
@@ -161,6 +169,7 @@ class TestPollModeAutoFallback:
         monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False)
 
         import a2a_tools
+        from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START
 
         async def fake_send(*_a, **_kw):
             return "normal reply"
@@ -179,7 +188,10 @@ class TestPollModeAutoFallback:
                 "ws-target", "task", source_workspace_id="ws-self"
             )
 
-        assert result == "normal reply"
+        # OFFSEC-003: wrapped in boundary markers
+        assert _A2A_BOUNDARY_START in result
+        assert _A2A_BOUNDARY_END in result
+        assert "normal reply" in result
         poll_mock.assert_not_called()
 
     async def test_error_send_result_does_not_trigger_fallback(self, monkeypatch):
-- 
2.45.2