diff --git a/canvas/src/components/canvas/__tests__/useKeyboardShortcuts.test.tsx b/canvas/src/components/canvas/__tests__/useKeyboardShortcuts.test.tsx new file mode 100644 index 00000000..cdf34d81 --- /dev/null +++ b/canvas/src/components/canvas/__tests__/useKeyboardShortcuts.test.tsx @@ -0,0 +1,309 @@ +// @vitest-environment jsdom +/** + * Tests for canvas keyboard shortcuts (useKeyboardShortcuts hook). + * + * Covers: Esc, Enter/Shift+Enter, Cmd+]/[, Z, and Arrow keys. + * + * The hook is tested by dispatching KeyboardEvents at the window and + * asserting the resulting store mutations / dispatched events. + */ +import React from "react"; +import { render, cleanup, fireEvent } from "@testing-library/react"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { useKeyboardShortcuts } from "../useKeyboardShortcuts"; +import { useCanvasStore } from "@/store/canvas"; + +// ─── Mock store ────────────────────────────────────────────────────────────── + +const mockSavePosition = vi.fn().mockResolvedValue(undefined); + +vi.mock("@/store/canvas", () => ({ + useCanvasStore: Object.assign( + vi.fn((sel) => sel(mockStoreState)), + { + getState: () => mockStoreState, + } + ), +})); + +// Module-level mutable state so tests can mutate between cases +const mockStoreState = { + selectedNodeId: null as string | null, + selectedNodeIds: new Set(), + nodes: [] as Array<{ id: string; position: { x: number; y: number }; data: { parentId?: string | null } }>, + contextMenu: null as { x: number; y: number; nodeId: string } | null, + closeContextMenu: vi.fn(), + selectNode: vi.fn(), + clearSelection: vi.fn(), + bumpZOrder: vi.fn(), + savePosition: mockSavePosition, + moveNode: vi.fn(), +}; + +afterEach(() => { + cleanup(); + vi.clearAllMocks(); + // Reset to default empty state between tests + mockStoreState.selectedNodeId = null; + mockStoreState.selectedNodeIds = new Set(); + mockStoreState.nodes = []; + mockStoreState.contextMenu = null; + mockStoreState.closeContextMenu.mockClear(); + mockStoreState.selectNode.mockClear(); + mockStoreState.clearSelection.mockClear(); + mockStoreState.bumpZOrder.mockClear(); + mockStoreState.moveNode.mockClear(); + mockStoreState.savePosition.mockClear(); +}); + +// ─── Test wrapper ──────────────────────────────────────────────────────────── + +function ShortcutTestComponent() { + useKeyboardShortcuts(); + return
; +} + +function renderWithProvider() { + return render(); +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +describe("Esc — deselect / close context menu", () => { + it("closes the context menu when one is open", () => { + mockStoreState.contextMenu = { x: 100, y: 100, nodeId: "n1" }; + renderWithProvider(); + fireEvent.keyDown(window, { key: "Escape" }); + expect(mockStoreState.closeContextMenu).toHaveBeenCalledTimes(1); + }); + + it("clears the batch selection when no context menu is open", () => { + mockStoreState.contextMenu = null; + mockStoreState.selectedNodeIds = new Set(["n1", "n2"]); + renderWithProvider(); + fireEvent.keyDown(window, { key: "Escape" }); + expect(mockStoreState.clearSelection).toHaveBeenCalledTimes(1); + }); + + it("deselects the focused node when no batch selection exists", () => { + mockStoreState.contextMenu = null; + mockStoreState.selectedNodeIds = new Set(); + mockStoreState.selectedNodeId = "n1"; + renderWithProvider(); + fireEvent.keyDown(window, { key: "Escape" }); + expect(mockStoreState.selectNode).toHaveBeenCalledWith(null); + }); +}); + +describe("Enter — hierarchy navigation", () => { + beforeEach(() => { + mockStoreState.selectedNodeId = "n1"; + mockStoreState.nodes = [ + { id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } }, + { id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } }, + { id: "n3", position: { x: 200, y: 0 }, data: { parentId: null } }, + ]; + }); + + it("navigates to the first child on Enter", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "Enter" }); + expect(mockStoreState.selectNode).toHaveBeenCalledWith("n2"); + }); + + it("navigates to the parent on Shift+Enter", () => { + mockStoreState.nodes = [ + { id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } }, + { id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } }, + ]; + mockStoreState.selectedNodeId = "n2"; + renderWithProvider(); + fireEvent.keyDown(window, { key: "Enter", shiftKey: true }); + expect(mockStoreState.selectNode).toHaveBeenCalledWith("n1"); + }); + + it("does NOT navigate when no node is selected", () => { + mockStoreState.selectedNodeId = null; + renderWithProvider(); + fireEvent.keyDown(window, { key: "Enter" }); + expect(mockStoreState.selectNode).not.toHaveBeenCalled(); + }); +}); + +describe("Cmd+]/[ — z-order bump", () => { + beforeEach(() => { + mockStoreState.selectedNodeId = "n1"; + }); + + it("bumps z-order forward on Cmd+]", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "]", metaKey: true }); + expect(mockStoreState.bumpZOrder).toHaveBeenCalledWith("n1", 1); + }); + + it("bumps z-order backward on Cmd+[", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "[", metaKey: true }); + expect(mockStoreState.bumpZOrder).toHaveBeenCalledWith("n1", -1); + }); + + it("uses Ctrl as the modifier key", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "]", ctrlKey: true }); + expect(mockStoreState.bumpZOrder).toHaveBeenCalledWith("n1", 1); + }); +}); + +describe("Z — zoom-to-team", () => { + let dispatchedEvents: CustomEvent[] = []; + + beforeEach(() => { + dispatchedEvents = []; + mockStoreState.selectedNodeId = "n1"; + mockStoreState.nodes = [ + { id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } }, + { id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } }, + ]; + window.addEventListener("molecule:zoom-to-team", (e) => { + dispatchedEvents.push(e as CustomEvent); + }); + }); + + afterEach(() => { + window.removeEventListener("molecule:zoom-to-team", () => {}); + }); + + it("dispatches zoom-to-team when the selected node has children", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "z" }); + expect(dispatchedEvents).toHaveLength(1); + expect(dispatchedEvents[0].detail.nodeId).toBe("n1"); + }); + + it("does NOT fire when no node is selected", () => { + mockStoreState.selectedNodeId = null; + renderWithProvider(); + fireEvent.keyDown(window, { key: "z" }); + expect(dispatchedEvents).toHaveLength(0); + }); + + it("does NOT fire when the node has no children", () => { + mockStoreState.nodes = [ + { id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } }, + ]; + renderWithProvider(); + fireEvent.keyDown(window, { key: "z" }); + expect(dispatchedEvents).toHaveLength(0); + }); + + it("skips when the target element is an input", () => { + renderWithProvider(); + const input = document.createElement("input"); + document.body.appendChild(input); + fireEvent.keyDown(input, { key: "z" }); + expect(dispatchedEvents).toHaveLength(0); + document.body.removeChild(input); + }); +}); + +describe("Arrow keys — keyboard node movement", () => { + beforeEach(() => { + mockStoreState.selectedNodeId = "n1"; + mockStoreState.nodes = [ + { id: "n1", position: { x: 100, y: 200 }, data: { parentId: null } }, + ]; + }); + + it("moves the selected node down on ArrowDown", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "ArrowDown" }); + expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 0, 10); + }); + + it("moves the selected node up on ArrowUp", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "ArrowUp" }); + expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 0, -10); + }); + + it("moves the selected node right on ArrowRight", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "ArrowRight" }); + expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 10, 0); + }); + + it("moves the selected node left on ArrowLeft", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "ArrowLeft" }); + expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", -10, 0); + }); + + it("moves 50 px when Shift is held", () => { + renderWithProvider(); + fireEvent.keyDown(window, { key: "ArrowDown", shiftKey: true }); + expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 0, 50); + }); + + it("does NOT fire when no node is selected", () => { + mockStoreState.selectedNodeId = null; + renderWithProvider(); + fireEvent.keyDown(window, { key: "ArrowDown" }); + expect(mockStoreState.moveNode).not.toHaveBeenCalled(); + }); + + it("skips when the target element is an input", () => { + renderWithProvider(); + const input = document.createElement("input"); + document.body.appendChild(input); + fireEvent.keyDown(input, { key: "ArrowDown" }); + expect(mockStoreState.moveNode).not.toHaveBeenCalled(); + document.body.removeChild(input); + }); + + it("skips when a modal dialog is already open", () => { + renderWithProvider(); + const dialog = document.createElement("div"); + dialog.setAttribute("role", "dialog"); + dialog.setAttribute("aria-modal", "true"); + document.body.appendChild(dialog); + fireEvent.keyDown(window, { key: "ArrowDown" }); + expect(mockStoreState.moveNode).not.toHaveBeenCalled(); + document.body.removeChild(dialog); + }); + + it("prevents default browser scroll on arrow keys", () => { + renderWithProvider(); + const preventDefault = vi.fn(); + fireEvent.keyDown(window, { + key: "ArrowDown", + preventDefault, + }); + expect(preventDefault).toHaveBeenCalled(); + }); +}); + +describe("all shortcuts respect inInput guard", () => { + it("ArrowDown is skipped in an input element", () => { + mockStoreState.selectedNodeId = "n1"; + renderWithProvider(); + const textarea = document.createElement("textarea"); + document.body.appendChild(textarea); + fireEvent.keyDown(textarea, { key: "ArrowDown" }); + expect(mockStoreState.moveNode).not.toHaveBeenCalled(); + document.body.removeChild(textarea); + }); + + it("Enter navigation is skipped in an input element", () => { + mockStoreState.selectedNodeId = "n1"; + mockStoreState.nodes = [ + { id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } }, + { id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } }, + ]; + renderWithProvider(); + const input = document.createElement("input"); + document.body.appendChild(input); + fireEvent.keyDown(input, { key: "Enter" }); + expect(mockStoreState.selectNode).not.toHaveBeenCalled(); + document.body.removeChild(input); + }); +}); diff --git a/canvas/src/components/canvas/useKeyboardShortcuts.ts b/canvas/src/components/canvas/useKeyboardShortcuts.ts index f9f67fd8..68a4e15b 100644 --- a/canvas/src/components/canvas/useKeyboardShortcuts.ts +++ b/canvas/src/components/canvas/useKeyboardShortcuts.ts @@ -14,6 +14,7 @@ import { useCanvasStore } from "@/store/canvas"; * Cmd/Ctrl+] — bump selected node forward in z-order * Cmd/Ctrl+[ — bump selected node backward in z-order * Z — zoom-to-team if the selected node has children + * Arrow keys — move selected node 10px (50px with Shift) */ export function useKeyboardShortcuts() { useEffect(() => { @@ -80,6 +81,33 @@ export function useKeyboardShortcuts() { ); } } + + // Arrow-key node movement — Figma-style keyboard drag for keyboard users. + // 10 px per press, 50 px with Shift held. Only fires when a node + // is selected and the target isn't a form control. + if ( + !inInput && + (e.key === "ArrowUp" || + e.key === "ArrowDown" || + e.key === "ArrowLeft" || + e.key === "ArrowRight") + ) { + const state = useCanvasStore.getState(); + const selectedId = state.selectedNodeId; + if (!selectedId) return; + // Skip when a modal/dialog is already open — dialogs own their own + // arrow-key semantics and shouldn't trigger canvas moves. + if (document.querySelector('[role="dialog"][aria-modal="true"]')) return; + e.preventDefault(); + const step = e.shiftKey ? 50 : 10; + let dx = 0; + let dy = 0; + if (e.key === "ArrowUp") dy = -step; + else if (e.key === "ArrowDown") dy = step; + else if (e.key === "ArrowLeft") dx = -step; + else dx = step; + state.moveNode(selectedId, dx, dy); + } }; window.addEventListener("keydown", handler); return () => window.removeEventListener("keydown", handler); diff --git a/canvas/src/store/__tests__/canvas-events.test.ts b/canvas/src/store/__tests__/canvas-events.test.ts index ddd7d0cc..28874573 100644 --- a/canvas/src/store/__tests__/canvas-events.test.ts +++ b/canvas/src/store/__tests__/canvas-events.test.ts @@ -1012,4 +1012,3 @@ describe("handleCanvasEvent – liveAnnouncement", () => { expect(state.liveAnnouncement ?? "").toBe(""); }); }); -}); diff --git a/canvas/src/store/__tests__/canvas.test.ts b/canvas/src/store/__tests__/canvas.test.ts index 81f13d81..e3410b14 100644 --- a/canvas/src/store/__tests__/canvas.test.ts +++ b/canvas/src/store/__tests__/canvas.test.ts @@ -1181,3 +1181,46 @@ describe("batchNest", () => { expect(nestPatches).toHaveLength(1); }); }); + +// ---------- moveNode ---------- + +describe("moveNode", () => { + beforeEach(() => { + const mock = global.fetch as ReturnType; + mock.mockImplementation(() => + Promise.resolve({ ok: true, json: () => Promise.resolve({}) } as Response), + ); + mock.mockClear(); + }); + + it("updates the node's position by the given delta", () => { + useCanvasStore.getState().hydrate([ + makeWS({ id: "n1", name: "Node 1", x: 100, y: 200 }), + ]); + useCanvasStore.getState().selectNode("n1"); + useCanvasStore.getState().moveNode("n1", 10, -50); + const node = useCanvasStore.getState().nodes.find((n) => n.id === "n1")!; + expect(node.position).toEqual({ x: 110, y: 150 }); + }); + + it("is a no-op when the node does not exist", () => { + useCanvasStore.getState().hydrate([makeWS({ id: "n1", name: "Node 1", x: 0, y: 0 })]); + expect(() => useCanvasStore.getState().moveNode("nonexistent", 10, 10)).not.toThrow(); + }); + + it("calls savePosition with the new absolute coordinates", async () => { + useCanvasStore.getState().hydrate([makeWS({ id: "n1", name: "Node 1", x: 100, y: 200 })]); + useCanvasStore.getState().selectNode("n1"); + const mock = global.fetch as ReturnType; + useCanvasStore.getState().moveNode("n1", 10, 20); + await vi.waitFor(() => { + expect(mock).toHaveBeenCalledWith( + expect.stringContaining("/workspaces/n1"), + expect.objectContaining({ + method: "PATCH", + body: JSON.stringify({ x: 110, y: 220 }), + }), + ); + }); + }); +}); diff --git a/canvas/src/store/canvas.ts b/canvas/src/store/canvas.ts index e2d6e150..38129468 100644 --- a/canvas/src/store/canvas.ts +++ b/canvas/src/store/canvas.ts @@ -165,6 +165,13 @@ interface CanvasState { * this so a drag that pushed a child past the parent edge commits * the parent grow on release (commit-on-release pattern). */ growParentsToFitChildren: () => void; + /** Move a selected node by (dx, dy) in canvas space. Used by keyboard + * arrow-key shortcuts so keyboard users can reposition nodes without a + * mouse. Persists the new position to the backend and skips the + * grow-parents pass that onNodesChange runs on every drag tick + * (avoids the "edge-chase" flicker that commit-on-release is meant to + * prevent). */ + moveNode: (nodeId: string, dx: number, dy: number) => void; /** Re-layout a parent's children to the default 2-column grid. Used * by the "Arrange children" context-menu command so users can rescue * out-of-bounds children on demand — topology no longer does it @@ -1032,6 +1039,19 @@ export const useCanvasStore = create((set, get) => ({ } }, + moveNode: (nodeId, dx, dy) => { + const node = get().nodes.find((n) => n.id === nodeId); + if (!node) return; + set({ + nodes: get().nodes.map((n) => + n.id === nodeId + ? { ...n, position: { x: n.position.x + dx, y: n.position.y + dy } } + : n, + ), + }); + void get().savePosition(nodeId, node.position.x + dx, node.position.y + dy); + }, + savePosition: async (nodeId: string, x: number, y: number) => { try { await api.patch(`/workspaces/${nodeId}`, { x, y }); diff --git a/docs/design-system/canvas-audit-items.md b/docs/design-system/canvas-audit-items.md index e1171443..70641468 100644 --- a/docs/design-system/canvas-audit-items.md +++ b/docs/design-system/canvas-audit-items.md @@ -55,7 +55,7 @@ canvas/src/ ### Node Rendering ✅ (with notes) - **Framework:** `@xyflow/react` (React Flow) — DOM-based, not SVG/Canvas - **Node selection:** `aria-pressed` + border ring (`border-accent/70`) + shadow -- **Node drag:** React Flow native drag — mouse only, no keyboard alternative yet +- **Node drag:** React Flow native drag + Arrow keys (10px/step, Shift 50px) — keyboard-accessible (PR #182) ✅ - **Node resize:** `NodeResizer` component visible on selected card, keyboard-inaccessible - **Status:** Accessible via `aria-label` on node cards — "Alpha Workspace workspace — online" @@ -72,10 +72,12 @@ canvas/src/ - **Minimap:** Not present (MiniMap mocked as null in tests) - **Status:** Basic keyboard support via viewport shortcuts -### Keyboard Shortcuts ⚠️ PARTIAL -- Exists in `useKeyboardShortcuts.ts` but no `aria-describedby` on trigger buttons -- No dedicated keyboard shortcut help dialog -- **Gap:** Users can't discover shortcuts visually +### Keyboard Shortcuts ✅ (strong) +- All shortcuts in `useKeyboardShortcuts.ts` with `inInput` guard ✅ +- Global `?` shortcut opens `KeyboardShortcutsDialog` (PR #175) ✅ +- Dialog: portal-based, aria-modal, focus trap, Escape close ✅ +- Arrow keys move selected node 10px (50px with Shift) — keyboard node drag (this PR) ✅ +- Hierarchy navigation (Enter/Shift+Enter), z-order (Cmd+]/[), zoom-to-team (Z) ✅ ### Focus Management ✅ (strong) - Skip link → `#canvas-main` ✅ @@ -83,9 +85,9 @@ canvas/src/ - Focus trap in modals via Radix ✅ - Focus ring: `focus-visible:ring-2 focus-visible:ring-blue-500 focus-visible:ring-offset-2 focus-visible:ring-offset-zinc-950` -### Accessibility Tree ⚠️ PARTIAL +### Accessibility Tree ✅ - Canvas is in accessibility tree (React Flow DOM nodes) -- Node state changes not announced to screen readers (no `aria-live` region) +- Node state changes announced via `aria-live="polite"` region (PR #172) ✅ - Context menus announced via `role="menu"` ✅ ### Context Menus ✅ (strong) @@ -95,11 +97,10 @@ canvas/src/ - Escape + Tab close menu ✅ - Auto-focus first item on open ✅ -### Drag and Drop ⚠️ PARTIAL +### Drag and Drop ✅ - **Mouse drag:** React Flow native - **Drop target:** Visual indicator (`bg-emerald-950/40 border-emerald-400/60`) ✅ -- **Keyboard alternative:** None — nodes repositioned only via mouse drag -- **Status:** Mouse-only. Keyboard users cannot rearrange nodes. +- **Keyboard alternative:** Arrow keys move selected node 10px per press (50px with Shift) (PR #182) ✅ --- @@ -109,7 +110,7 @@ canvas/src/ |----------|------|-------|--------| | ~~HIGH~~ | ~~Screen reader announcements for canvas state changes~~ | ~~Canvas.tsx, canvas-events.ts, canvas.ts~~ | ✅ Done — PR #172 | | MEDIUM | Keyboard shortcut help dialog | useKeyboardShortcuts.ts | ✅ Done (PR #175) | -| MEDIUM | Keyboard-accessible node drag | WorkspaceNode.tsx, useDragHandlers.ts | Not started | +| MEDIUM | Keyboard-accessible node drag | WorkspaceNode.tsx, useDragHandlers.ts | ✅ Done (this PR) | | LOW | Edge anchor keyboard accessibility | A2AEdge.tsx | Not started | | LOW | Node resize keyboard accessibility | WorkspaceNode.tsx (NodeResizer) | Not started | diff --git a/workspace-server/internal/handlers/a2a_proxy.go b/workspace-server/internal/handlers/a2a_proxy.go index 6143982e..97296d4f 100644 --- a/workspace-server/internal/handlers/a2a_proxy.go +++ b/workspace-server/internal/handlers/a2a_proxy.go @@ -490,7 +490,14 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri if logActivity && deliveryConfirmed { h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs) } - return 0, nil, &proxyA2AError{ + // Preserve the actual HTTP status code and any body bytes already read. + // Previously this returned (0, nil, error) which discarded both. + // Preserving them allows executeDelegation's new condition + // proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 + // to correctly route delivery-confirmed responses (where the agent completed + // the work but the TCP connection dropped before the full body was received) + // to success instead of failure (#159). + return resp.StatusCode, respBody, &proxyA2AError{ Status: http.StatusBadGateway, Response: gin.H{ "error": "failed to read agent response", diff --git a/workspace-server/internal/handlers/delegation.go b/workspace-server/internal/handlers/delegation.go index 0156f864..6761ec7e 100644 --- a/workspace-server/internal/handlers/delegation.go +++ b/workspace-server/internal/handlers/delegation.go @@ -348,7 +348,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s // received). Treat as success: the response body is valid and the work is done. // This prevents "retry storms" where the canvas sees error + Restart-workspace // suggestion even though the delegation actually completed. - if proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 { + if isDeliveryConfirmedSuccess(proxyErr, status, respBody) { log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success", delegationID, status, len(respBody), proxyErr.Error()) goto handleSuccess @@ -685,6 +685,34 @@ func isTransientProxyError(err *proxyA2AError) bool { return false } +// isDeliveryConfirmedSuccess reports whether the proxy's `(status, body, err)` +// triple represents a delivery-confirmed success: the proxy hit a transport- +// layer error AFTER receiving a complete 2xx response with a non-empty body. +// In that case the agent did the work — the error is on the wire, not in the +// agent — so the delegation should be marked succeeded rather than failed +// (preventing the retry-storm + restart-suggest cascade described in #159). +// +// Caller invariants: +// - proxyErr != nil: a delivery error fired (e.g. connection reset). +// - len(respBody) > 0: a response body was received before the error. +// - 200 <= status < 300: the partial response carried a 2xx code. +// +// All three must hold. nil proxyErr → no decision to make (success path +// already chosen upstream). Empty body → no work-result to recover. Non-2xx → +// the agent itself signalled failure or transient state; don't promote it. +func isDeliveryConfirmedSuccess(proxyErr *proxyA2AError, status int, respBody []byte) bool { + if proxyErr == nil { + return false + } + if len(respBody) == 0 { + return false + } + if status < 200 || status >= 300 { + return false + } + return true +} + // isQueuedProxyResponse reports whether the proxy returned a body shaped like // `{"queued": true, "queue_id": ..., "queue_depth": ..., "message": ...}` — // the busy-target enqueue path in a2a_proxy_helpers.go. Caller checks this diff --git a/workspace-server/internal/handlers/delegation_test.go b/workspace-server/internal/handlers/delegation_test.go index 21cc3a90..427e71b2 100644 --- a/workspace-server/internal/handlers/delegation_test.go +++ b/workspace-server/internal/handlers/delegation_test.go @@ -5,8 +5,10 @@ import ( "context" "encoding/json" "fmt" + "net" "net/http" "net/http/httptest" + "sync" "testing" "time" @@ -376,6 +378,44 @@ func TestIsTransientProxyError_RetriesOnRestartRaceStatuses(t *testing.T) { } } +// TestIsDeliveryConfirmedSuccess — regression guard for #159: the proxy can +// return a complete 2xx body and THEN raise a transport error (e.g. the TCP +// connection drops after the response is received but before close). In that +// case the agent did the work; marking the delegation "failed" causes the +// retry-storm + Restart-workspace cascade described in #159. The new helper +// distinguishes this from genuine failures. +func TestIsDeliveryConfirmedSuccess(t *testing.T) { + connErr := &proxyA2AError{Status: http.StatusOK, Response: gin.H{}} + cases := []struct { + name string + proxyErr *proxyA2AError + status int + body []byte + expect bool + }{ + // The new branch: 2xx + body + transport error → recover as success. + {"200 + body + connreset (THE bug fix path)", connErr, http.StatusOK, []byte(`{"text":"ok"}`), true}, + {"299 + body + connreset (boundary high)", connErr, 299, []byte(`{"text":"ok"}`), true}, + {"200 + body + connreset (boundary low)", connErr, 200, []byte(`{"x":1}`), true}, + // Negative cases: any one of the three preconditions failing → false. + {"nil proxyErr (no decision to make)", nil, http.StatusOK, []byte(`{"text":"ok"}`), false}, + {"empty body (no work-result to recover)", connErr, http.StatusOK, []byte{}, false}, + {"nil body (no work-result to recover)", connErr, http.StatusOK, nil, false}, + {"4xx with body — agent signalled failure, do not promote", connErr, http.StatusBadRequest, []byte(`{"err":"bad"}`), false}, + {"5xx with body — agent signalled failure, do not promote", connErr, http.StatusInternalServerError, []byte(`{"err":"crash"}`), false}, + {"3xx with body — redirect, not a result", connErr, 301, []byte(`{"loc":"/x"}`), false}, + {"199 status (under 200) — not a 2xx", connErr, 199, []byte(`{"x":1}`), false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := isDeliveryConfirmedSuccess(tc.proxyErr, tc.status, tc.body); got != tc.expect { + t.Errorf("isDeliveryConfirmedSuccess(%v, %d, %q) = %v, want %v", + tc.proxyErr, tc.status, string(tc.body), got, tc.expect) + } + }) + } +} + func TestIsQueuedProxyResponse(t *testing.T) { // Regression guard for the chat-leak bug: when the proxy returns // 202 with a queued-shape body, executeDelegation must classify it @@ -918,3 +958,308 @@ func TestInsertDelegationOutcome_ZeroValueIsUnknown(t *testing.T) { t.Errorf("insertOutcomeUnknown must not collide with insertOK") } } + +// ==================== executeDelegation — delivery-confirmed proxy error regression tests ==================== +// +// These test the fix for issue #159: when proxyA2ARequest returns an error but we have a +// non-empty response body with a 2xx status code, executeDelegation must treat it as success. +// The error is a delivery/transport error (e.g., connection reset after response was received). +// Previously, executeDelegation marked these as "failed" even though the work was done, +// causing retry storms and "error" rendering in canvas despite the response being available. +// +// Test strategy: spin up a mock A2A agent server, set up the source/target DB rows, call +// executeDelegation directly, and verify the activity_logs status and delegation status. + +const testDelegationID = "del-159-test" +const testSourceID = "ws-source-159" +const testTargetID = "ws-target-159" + +// expectExecuteDelegationBase sets up sqlmock expectations for the DB queries that +// executeDelegation always makes, regardless of outcome. +func expectExecuteDelegationBase(mock sqlmock.Sqlmock) { + // updateDelegationStatus: dispatched + // Uses prefix match — sqlmock regexes match the full query string. + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("dispatched", "", testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // CanCommunicate (source=target self-call is always allowed — no DB lookup needed) + // resolveAgentURL: reads ws:{id}:url from Redis, falls back to DB for target + mock.ExpectQuery("SELECT url, status FROM workspaces WHERE id = "). + WithArgs(testTargetID). + WillReturnRows(sqlmock.NewRows([]string{"url", "status"}).AddRow("", "online")) +} + +// expectExecuteDelegationSuccess sets up expectations for a completed delegation. +func expectExecuteDelegationSuccess(mock sqlmock.Sqlmock, respBody string) { + // INSERT activity_logs for delegation completion (response_body status = 'completed') + mock.ExpectExec("INSERT INTO activity_logs"). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), "completed"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // updateDelegationStatus: completed + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("completed", "", testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) +} + +// expectExecuteDelegationFailed sets up expectations for a failed delegation. +func expectExecuteDelegationFailed(mock sqlmock.Sqlmock) { + // INSERT activity_logs for delegation failure (response_body status = 'failed') + mock.ExpectExec("INSERT INTO activity_logs"). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), "failed"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // updateDelegationStatus: failed + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("failed", sqlmock.AnyArg(), testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) +} + +// TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess is the primary regression +// test for issue #159. The scenario: +// - Attempt 1: server sends 200 OK headers + partial body, then closes connection. +// proxyA2ARequest: body read gets io.EOF (partial body read), returns (200, , BadGateway). +// isTransientProxyError(BadGateway) = TRUE → retry. +// - Attempt 2: server does the same thing (closes after partial body). +// proxyA2ARequest: same (200, , BadGateway). +// isTransientProxyError(BadGateway) = TRUE → retry AGAIN (but outer context will fire soon, +// or we get one more attempt). For the test we let it run. +// POST-FIX: the executeDelegation new condition sees status=200, body=, err!=nil +// and routes to handleSuccess immediately. +// +// The key pre/post-fix difference: pre-fix, executeDelegation received status=0 (hardcoded) +// even when the server sent 200, so the condition always failed. Post-fix, status=200 is +// preserved through the error return path (proxyA2ARequest now returns resp.StatusCode, respBody). +// In this test the retry ultimately succeeds (server eventually sends full body), but +// the critical assertion is that a 2xx partial-body delivery-confirmed response is never +// classified as "failed" — it always routes to success. +func TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + // Server that sends a 200 response with declared Content-Length but closes + // the connection before sending all bytes. Go's http.Client sees io.EOF on + // the body read. proxyA2ARequest captures the partial body + status=200 and + // returns (200, , error). executeDelegation's new condition sees + // status=200 + body > 0 + error != nil → routes to handleSuccess. + var wg sync.WaitGroup + wg.Add(1) + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + defer ln.Close() + go func() { + defer wg.Done() + conn, err := ln.Accept() + if err != nil { + return + } + defer conn.Close() + // Consume the HTTP request + buf := make([]byte, 2048) + conn.Read(buf) + // Send 200 OK with Content-Length: 100 but only 74 bytes of body + // (less than declared length → io.LimitReader returns io.EOF after reading all 74) + resp := "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n" + resp += `{"result":{"parts":[{"text":"work completed successfully"}]}}` // 74 bytes + conn.Write([]byte(resp)) + // Close immediately — client gets io.EOF on body read + }() + + agentURL := "http://" + ln.Addr().String() + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL) + allowLoopbackForTest(t) + + expectExecuteDelegationBase(mock) + expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"work completed successfully"}]}}`) + + // Execute synchronously (not as a goroutine) so we can check DB state immediately. + // The handler fires it as goroutine; we call it directly for deterministic testing. + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", + "id": "1", + "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) // let DB writes settle + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed verifies that the pre-fix failure +// path is unchanged when proxyA2ARequest returns a delivery-confirmed error with a non-2xx +// status code (e.g., 500 Internal Server Error with partial body read before connection drop). +// The new condition requires status >= 200 && status < 300, so non-2xx always routes to failure. +func TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + // Server returns 500 with declared Content-Length but closes connection early. + // proxyA2ARequest: reads 500 headers, partial body, then connection drop → body read error. + // Returns (500, , BadGateway). + // New condition: status=500 is NOT >= 200 && < 300 → routes to failure. + // isTransientProxyError(500) = false → no retry. + var wg sync.WaitGroup + wg.Add(1) + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + defer ln.Close() + go func() { + defer wg.Done() + conn, err := ln.Accept() + if err != nil { + return + } + defer conn.Close() + buf := make([]byte, 2048) + conn.Read(buf) + // 500 with Content-Length: 100 but only ~60 bytes of body + resp := "HTTP/1.1 500 Internal Server Error\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n" + resp += `{"error":"agent crashed"}` // ~24 bytes, less than declared + conn.Write([]byte(resp)) + // Close immediately — client gets io.EOF on body read + }() + + agentURL := "http://" + ln.Addr().String() + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL) + allowLoopbackForTest(t) + + expectExecuteDelegationBase(mock) + expectExecuteDelegationFailed(mock) + + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", "id": "1", "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed verifies that the pre-fix failure +// path is unchanged when proxyA2ARequest returns an error with a 2xx status but empty body. +// The new condition requires len(respBody) > 0, so empty body routes to failure. +func TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + // Server returns 502 Bad Gateway — proxyA2ARequest returns 502, body="" (empty), error != nil. + // New condition: proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 + // → len(respBody) == 0 → condition FALSE → falls through to failure. + // isTransientProxyError(502) is TRUE → retry → same result → failure. + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadGateway) + // No body — connection closes normally + })) + defer agentServer.Close() + + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL) + allowLoopbackForTest(t) + + // First attempt: updateDelegationStatus(dispatched) — from expectExecuteDelegationBase + expectExecuteDelegationBase(mock) + // Second attempt (retry): updateDelegationStatus(dispatched) again + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("dispatched", "", testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) + // Failure: INSERT + UPDATE (failed) + expectExecuteDelegationFailed(mock) + + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", "id": "1", "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestExecuteDelegation_CleanProxyResponse_Unchanged verifies that a clean proxy response +// (no error, 200 with body) is unaffected by the new condition. This is the baseline: +// proxyErr == nil so the new condition never fires. +func TestExecuteDelegation_CleanProxyResponse_Unchanged(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(`{"result":{"parts":[{"text":"all good"}]}}`)) + })) + defer agentServer.Close() + + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL) + allowLoopbackForTest(t) + + expectExecuteDelegationBase(mock) + expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"all good"}]}}`) + + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", "id": "1", "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} +} diff --git a/workspace-server/internal/pendinguploads/export_test.go b/workspace-server/internal/pendinguploads/export_test.go index c758b629..b34d655d 100644 --- a/workspace-server/internal/pendinguploads/export_test.go +++ b/workspace-server/internal/pendinguploads/export_test.go @@ -6,12 +6,23 @@ import ( ) // StartSweeperWithIntervalForTest exposes startSweeperWithInterval to -// the external test package. The production code uses StartSweeper +// the external test package. The production code uses StartSeper // (which pins the canonical SweepInterval); tests pin a short interval // to exercise the ticker-driven cycle without burning real wall-clock // time. The Go convention `export_test.go` keeps this seam OUT of the // production binary — files ending in _test.go are stripped at build // time, so this re-export only exists during `go test`. func StartSweeperWithIntervalForTest(ctx context.Context, storage Storage, ackRetention, interval time.Duration) { - startSweeperWithInterval(ctx, storage, ackRetention, interval) + startSweeperWithInterval(ctx, storage, ackRetention, interval, nil) +} + +// StartSweeperForTest starts the sweeper and returns a done channel +// that is closed exactly once when the loop exits. Tests MUST receive +// from done before returning so the goroutine has fully terminated and +// the shared metric counters are stable for the next test's baseline +// capture (issue #86). +func StartSweeperForTest(ctx context.Context, storage Storage, ackRetention time.Duration) chan struct{} { + done := make(chan struct{}) + go startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval, done) + return done } diff --git a/workspace-server/internal/pendinguploads/sweeper.go b/workspace-server/internal/pendinguploads/sweeper.go index b29a87ad..31a1920c 100644 --- a/workspace-server/internal/pendinguploads/sweeper.go +++ b/workspace-server/internal/pendinguploads/sweeper.go @@ -66,15 +66,21 @@ const sweepDeadline = 30 * time.Second // to exercise the ticker-driven sweep path without burning real wall- // clock time. func StartSweeper(ctx context.Context, storage Storage, ackRetention time.Duration) { - startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval) + startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval, nil) } // startSweeperWithInterval is the test-friendly variant of StartSweeper // — same loop, but the cadence is caller-specified. Production code // should use StartSweeper to keep the SweepInterval constant pinned. -func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration) { +// If done is non-nil it is closed exactly once when the loop exits, +// allowing tests to wait for the goroutine to fully terminate before +// asserting on shared metric counters (issue #86). +func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration, done chan struct{}) { if storage == nil { log.Println("pendinguploads sweeper: storage is nil — sweeper disabled") + if done != nil { + close(done) + } return } if ackRetention == 0 { @@ -86,6 +92,12 @@ func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention ) ticker := time.NewTicker(interval) defer ticker.Stop() + defer func() { + log.Println("pendinguploads sweeper: shutdown") + if done != nil { + close(done) + } + }() // Run once immediately so a platform restart cleans up any rows // that became eligible while we were down — don't make the // operator wait 5 minutes for the first sweep. @@ -93,9 +105,16 @@ func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention for { select { case <-ctx.Done(): - log.Println("pendinguploads sweeper: shutdown") return case <-ticker.C: + // Guard: ctx may have been cancelled between the ticker firing + // and this case being selected (MPMC channel; close-to-ready race). + // Calling sweepOnce with an already-cancelled ctx would increment + // the error counter on shutdown — polluting the next test's + // baseline (issue #86 full-suite failure). + if ctx.Err() != nil { + continue + } sweepOnce(ctx, storage, ackRetention) } } diff --git a/workspace-server/internal/pendinguploads/sweeper_test.go b/workspace-server/internal/pendinguploads/sweeper_test.go index 8095e83d..b1a723a6 100644 --- a/workspace-server/internal/pendinguploads/sweeper_test.go +++ b/workspace-server/internal/pendinguploads/sweeper_test.go @@ -136,7 +136,7 @@ func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go pendinguploads.StartSweeper(ctx, store, time.Hour) + done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour) store.waitForCycle(t, 1, 2*time.Second) if got := store.calls.Load(); got < 1 { t.Errorf("expected at least one immediate sweep, got %d", got) @@ -145,6 +145,10 @@ func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) { if store.gotRetention.Load() != 3600 { t.Errorf("retention seconds = %d, want 3600", store.gotRetention.Load()) } + // #86 fix: ensure goroutine has exited before the next test's + // metricDelta() baseline capture. + cancel() + <-done } func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) { @@ -152,23 +156,22 @@ func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go pendinguploads.StartSweeper(ctx, store, 0) + done := pendinguploads.StartSweeperForTest(ctx, store, 0) store.waitForCycle(t, 1, 2*time.Second) want := int64(pendinguploads.DefaultAckRetention.Seconds()) if store.gotRetention.Load() != want { t.Errorf("retention = %d, want default %d", store.gotRetention.Load(), want) } + // #86 fix. + cancel() + <-done } func TestStartSweeper_ContextCancelStopsLoop(t *testing.T) { store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil) ctx, cancel := context.WithCancel(context.Background()) - done := make(chan struct{}) - go func() { - pendinguploads.StartSweeper(ctx, store, time.Second) - close(done) - }() + done := pendinguploads.StartSweeperForTest(ctx, store, time.Second) store.waitForCycle(t, 1, 2*time.Second) cancel() @@ -187,14 +190,17 @@ func TestStartSweeperWithInterval_TickerFiresAdditionalCycles(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go pendinguploads.StartSweeperWithIntervalForTest(ctx, store, time.Hour, 30*time.Millisecond) - + done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour) // Immediate cycle + at least one tick-driven cycle. store.waitForCycle(t, 2, 2*time.Second) if got := store.calls.Load(); got < 2 { t.Errorf("expected ≥2 cycles (immediate + 1 tick), got %d", got) } + // #86 fix: drain the done channel so the goroutine is fully gone + // before the next test's metricDelta() baseline capture. + cancel() + <-done } func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) { @@ -217,7 +223,7 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) { // waitForCycle is too early. _, _, deltaError := metricDelta(t) - go pendinguploads.StartSweeper(ctx, store, time.Hour) + done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour) // Wait for the first (errored) cycle. store.waitForCycle(t, 1, 2*time.Second) @@ -226,11 +232,13 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) { // stops the loop on the next select pass with no in-flight metric // writes outstanding. waitForMetricDelta(t, deltaError, 1, 2*time.Second) - // Cancel — the goroutine returns cleanly, proving the error path - // didn't crash the loop. Without this fix the goroutine would have - // either panicked (process abort visible at exit) or stuck (this - // cancel + done-channel pattern would deadlock instead). + // Cancel and wait for the goroutine to fully exit (#86 fix). + // Without the done-channel wait, the goroutine races with the next + // test's metricDelta() baseline capture — the next test may see + // error=1 from this test still "in flight", throwing off its + // deltaError assertion. cancel() + <-done } // metricDelta returns a function that, when called, returns how much @@ -265,7 +273,7 @@ func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go pendinguploads.StartSweeper(ctx, store, time.Hour) + done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour) store.waitForCycle(t, 1, 2*time.Second) // Poll for the success counters to settle — closes the cycleDone- @@ -278,6 +286,15 @@ func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) { if got := deltaError(); got != 0 { t.Errorf("error counter delta = %d, want 0", got) } + + // #86 fix: drain the done channel so the goroutine is fully gone + // before the next test's metricDelta() baseline capture. Without this + // the previous test's goroutine could still be mid-Sweep (blocked on + // the fake's results channel) and its eventual return would mutate + // the shared error/acked counters after the next test has already + // snapshot its baseline. + cancel() + <-done } func TestStartSweeper_RecordsMetricsOnError(t *testing.T) { @@ -290,7 +307,7 @@ func TestStartSweeper_RecordsMetricsOnError(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go pendinguploads.StartSweeper(ctx, store, time.Hour) + done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour) store.waitForCycle(t, 1, 2*time.Second) // Poll for the error counter to settle — cycleDone fires inside @@ -300,4 +317,9 @@ func TestStartSweeper_RecordsMetricsOnError(t *testing.T) { // though the metric WILL be 1 a few ms later. See // waitForMetricDelta comment. waitForMetricDelta(t, deltaError, 1, 2*time.Second) + + // #86 fix: ensure the goroutine has fully exited before the next + // test's metricDelta() baseline capture. + cancel() + <-done } diff --git a/workspace-server/internal/plugins/local_test.go b/workspace-server/internal/plugins/local_test.go index bbbf74f9..9541a303 100644 --- a/workspace-server/internal/plugins/local_test.go +++ b/workspace-server/internal/plugins/local_test.go @@ -132,19 +132,20 @@ func TestLocalResolver_HonoursContextCancellation(t *testing.T) { } func TestLocalResolver_BubblesUpCopyFailure(t *testing.T) { - // os.Chmod(dst, 0o555) silently passes when os.Geteuid() == 0 - // (root bypasses POSIX permission checks). We cannot reliably - // exercise the write-failure branch in a root environment without - // patching the syscalls, so skip it honestly. + // Source file the copyTree walk would read; make dst unwritable so + // the copyFile step fails. Skip when running as root — Linux + // filesystem permissions are advisory-only for uid 0, so chmod 0o555 + // does not prevent writes and the test passes vacuously instead of + // exercising the error path (issue #87). if os.Getuid() == 0 { - t.Skip("running as root — cannot exercise write-failure branch") + t.Skip("skipping: chmod 0o555 is ineffective when running as root") } - base := t.TempDir() writePlugin(t, base, "demo", map[string]string{ "plugin.yaml": "name: demo\n", }) dst := t.TempDir() + // Make dst read-only so creating files inside it fails. if err := os.Chmod(dst, 0o555); err != nil { t.Fatal(err) } diff --git a/workspace/tests/conftest.py b/workspace/tests/conftest.py index abae4168..b946240d 100644 --- a/workspace/tests/conftest.py +++ b/workspace/tests/conftest.py @@ -401,6 +401,35 @@ if "a2a" not in sys.modules: # tests now live in the claude-code template repo, where the real SDK # IS installed via Dockerfile, so no stub is needed. + +# ==================== Test isolation fixtures ==================== + +import pytest + + +@pytest.fixture(scope="function", autouse=True) +def _clear_platform_auth_cache(): + """Reset platform_auth._cached_token before each test. + + Fixes issue #160: tests that use monkeypatch.delenv("MOLECULE_WORKSPACE_TOKEN") + to simulate "no token in env" fail when platform_auth._cached_token was already + set from a prior test's MOLECULE_WORKSPACE_TOKEN value. The cache is populated + at module import or first get_token() call and persists for the process lifetime + — monkeypatch.delenv removes the env var but not the module-level cache. + + Run at function scope so each test starts with a clean slate regardless of + what the previous test set. The import is inside the fixture (not at file + top-level) because conftest.py runs during test collection before + platform_auth might be available in all test environments. If the module is + absent (import error), the fixture is a no-op. + """ + try: + import platform_auth as _pa + _pa.clear_cache() + except ImportError: + pass + yield # run the test, then fixture teardown has nothing to do + if "langchain_core" not in sys.modules: _make_langchain_mocks()