diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index e9ca5ec2..08a65d14 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -32,11 +32,9 @@ on: - '.gitea/workflows/publish-workspace-server-image.yml' workflow_dispatch: -# Serialize per-branch so two rapid staging pushes don't race the same -# :staging-latest tag retag. Allow staging and main to run in parallel -# (different GITHUB_REF → different concurrency group) since they -# produce different :staging- tags and last-write-wins on -# :staging-latest is acceptable across branches. +# Serialize per-branch so two rapid main pushes don't race the same +# :staging-latest tag retag. Allow parallel runs as they produce +# different :staging- tags and last-write-wins on :staging-latest. # # cancel-in-progress: false → in-flight builds finish; the next push's # build queues. This avoids a partially-pushed image. diff --git a/.staging-trigger b/.staging-trigger new file mode 100644 index 00000000..270a6560 --- /dev/null +++ b/.staging-trigger @@ -0,0 +1 @@ +staging trigger \ No newline at end of file diff --git a/manifest.json b/manifest.json index 2ac2f462..bde3a1d9 100644 --- a/manifest.json +++ b/manifest.json @@ -44,3 +44,4 @@ {"name": "mock-bigorg", "repo": "molecule-ai/molecule-ai-org-template-mock-bigorg", "ref": "main"} ] } +// Triggered by Integration Tester at 2026-05-10T08:52Z diff --git a/scripts/clone-manifest.sh b/scripts/clone-manifest.sh index 4e9e5d99..d6e343c8 100755 --- a/scripts/clone-manifest.sh +++ b/scripts/clone-manifest.sh @@ -37,6 +37,50 @@ PLUGINS_DIR="${4:?Missing plugins dir}" EXPECTED=0 CLONED=0 +# clone_one_with_retry — clone a single repo, retrying on transient failure. +# +# Why: the publish-workspace-server-image (and harness-replays) CI jobs +# clone the full manifest (~36 repos) serially on a memory-constrained +# Gitea Actions runner. Under host memory pressure the OOM killer +# occasionally SIGKILLs git-remote-https mid-clone: +# +# error: git-remote-https died of signal 9 +# fatal: the remote end hung up unexpectedly +# +# (observed in publish-workspace-server-image run 4622 on 2026-05-10 — the +# job died on the 14th of 36 clones, which wedged staging→main). One +# transient SIGKILL / network blip would otherwise fail the whole tenant +# image rebuild. Retrying after a short backoff lets the pressure subside. +# The durable fix is more runner RAM/swap (tracked with Infra-SRE); this +# just stops a single flake from being release-blocking. +# +# Args: +clone_one_with_retry() { + local tdir="$1" name="$2" url="$3" display="$4" ref="$5" + local attempt=1 max_attempts=3 backoff + + while : ; do + # A killed attempt can leave a partial directory behind; git clone + # refuses a non-empty target, so wipe it before each try. + rm -rf "$tdir/$name" + + if [ "$ref" = "main" ]; then + if git clone --depth=1 -q "$url" "$tdir/$name"; then return 0; fi + else + if git clone --depth=1 -q --branch "$ref" "$url" "$tdir/$name"; then return 0; fi + fi + + if [ "$attempt" -ge "$max_attempts" ]; then + echo "::error::clone failed after ${max_attempts} attempts: ${display}" >&2 + return 1 + fi + backoff=$((attempt * 3)) # 3s, then 6s + echo " ⚠ clone attempt ${attempt}/${max_attempts} failed for ${display} — retrying in ${backoff}s" >&2 + sleep "$backoff" + attempt=$((attempt + 1)) + done +} + clone_category() { local category="$1" local target_dir="$2" @@ -82,11 +126,7 @@ clone_category() { fi echo " cloning $display_url -> $target_dir/$name (ref=$ref)" - if [ "$ref" = "main" ]; then - git clone --depth=1 -q "$clone_url" "$target_dir/$name" - else - git clone --depth=1 -q --branch "$ref" "$clone_url" "$target_dir/$name" - fi + clone_one_with_retry "$target_dir" "$name" "$clone_url" "$display_url" "$ref" CLONED=$((CLONED + 1)) i=$((i + 1)) done diff --git a/workspace/builtin_tools/a2a_tools.py b/workspace/builtin_tools/a2a_tools.py index acdd15cb..48b813a1 100644 --- a/workspace/builtin_tools/a2a_tools.py +++ b/workspace/builtin_tools/a2a_tools.py @@ -77,6 +77,16 @@ async def delegate_task(workspace_id: str, task: str) -> str: return str(result) if isinstance(result, str) else "(no text)" elif "error" in data: err = data["error"] + # Handle both string-form errors ("error": "some string") + # and object-form errors ("error": {"message": "...", "code": ...}). + msg = "" + if isinstance(err, dict): + msg = err.get("message", "") + elif isinstance(err, str): + msg = err + else: + msg = str(err) + return f"Error: {msg}" msg = "" if isinstance(err, dict): msg = err.get("message", "") diff --git a/workspace/tests/test_a2a_response.py b/workspace/tests/test_a2a_response.py index cf254b36..3b138858 100644 --- a/workspace/tests/test_a2a_response.py +++ b/workspace/tests/test_a2a_response.py @@ -115,12 +115,91 @@ _FIXTURES = { "malformed_delivery_mode_no_status": { "delivery_mode": "poll", }, + + # --- Push-mode queue envelopes --- + # Returned when a push-mode workspace (has public URL) is at capacity. + # The platform queues the request and returns {"queued": true, ...}. + # Distinguishable from poll-mode by data.get("queued") is True alone. + "push_queued_full": { + "queued": True, + "method": "message/send", + "queue_id": "q-1", + }, + "push_queued_notify": { + "queued": True, + "method": "notify", + "queue_id": "q-2", + }, + "push_queued_no_method": { + # method absent — parser must not raise; falls back to "message/send". + "queued": True, + "queue_id": "q-3", + }, + "push_queued_no_queue_id": { + # queue_id absent — parser must not raise; logs queue_id="?". + "queued": True, + "method": "message/send", + }, } # ============== Variant-by-variant coverage ============== +class TestPushQueuedVariant: + """``parse()`` returns ``Queued`` for push-mode at-capacity envelope + (lines 189-197 of a2a_response.py): ``{"queued": true, ...}``. + + The push-mode path was added in PR #278 alongside the a2a_proxy.go + push-at-capacity branch. Lines 182-197 were not covered until this test. + """ + + def test_full_envelope_message_send(self): + v = a2a_response.parse(_FIXTURES["push_queued_full"]) + assert isinstance(v, a2a_response.Queued) + assert v.method == "message/send" + assert v.delivery_mode == "poll" + + def test_envelope_with_notify(self): + v = a2a_response.parse(_FIXTURES["push_queued_notify"]) + assert isinstance(v, a2a_response.Queued) + assert v.method == "notify" + + def test_envelope_missing_method_falls_back_to_message_send(self): + # a2a_response.py:191 — method_raw is None, defaults to "message/send". + v = a2a_response.parse(_FIXTURES["push_queued_no_method"]) + assert isinstance(v, a2a_response.Queued) + assert v.method == "message/send" + + def test_envelope_missing_queue_id_still_queued(self): + # queue_id is purely informational; its absence must not break parsing. + v = a2a_response.parse(_FIXTURES["push_queued_no_queue_id"]) + assert isinstance(v, a2a_response.Queued) + assert v.method == "message/send" + + def test_push_queued_is_distinct_from_poll_queued(self): + # Same Queued variant, but from different wire shapes. Confirm both paths. + push_v = a2a_response.parse(_FIXTURES["push_queued_full"]) + poll_v = a2a_response.parse(_FIXTURES["poll_queued_full"]) + assert isinstance(push_v, a2a_response.Queued) + assert isinstance(poll_v, a2a_response.Queued) + assert push_v.method == poll_v.method == "message/send" + + def test_logs_info_on_push_queued(self, caplog): + with caplog.at_level(logging.INFO, logger="a2a_response"): + a2a_response.parse(_FIXTURES["push_queued_full"]) + assert any("queued for busy push-mode peer" in r.message for r in caplog.records) + assert any("queue_id=q-1" in r.message for r in caplog.records) + + def test_queued_true_is_distinct_from_queued_truthy(self): + # "queued": 1 / "queued": "yes" — these are truthy but not True, + # and must NOT trigger the push-mode path. Route to Malformed instead. + v = a2a_response.parse({"queued": 1}) + assert isinstance(v, a2a_response.Malformed) + v = a2a_response.parse({"queued": "yes"}) + assert isinstance(v, a2a_response.Malformed) + + class TestQueuedVariant: """``parse()`` recognizes the workspace-server poll-mode short-circuit envelope (a2a_proxy.go:402-406) and returns ``Queued``.""" @@ -436,6 +515,10 @@ class TestRegressionGate: "poll_queued_full": a2a_response.Queued, "poll_queued_notify": a2a_response.Queued, "poll_queued_no_method": a2a_response.Queued, + "push_queued_full": a2a_response.Queued, + "push_queued_notify": a2a_response.Queued, + "push_queued_no_method": a2a_response.Queued, + "push_queued_no_queue_id": a2a_response.Queued, "malformed_empty_dict": a2a_response.Malformed, "malformed_unexpected_keys": a2a_response.Malformed, "malformed_status_queued_no_delivery_mode": a2a_response.Malformed,