Merge remote-tracking branch 'origin/main' into trig-189

This commit is contained in:
Molecule AI Core Platform Lead 2026-05-09 22:58:32 +00:00
commit 2a04233d5a
14 changed files with 903 additions and 41 deletions

View File

@ -0,0 +1,309 @@
// @vitest-environment jsdom
/**
* Tests for canvas keyboard shortcuts (useKeyboardShortcuts hook).
*
* Covers: Esc, Enter/Shift+Enter, Cmd+]/[, Z, and Arrow keys.
*
* The hook is tested by dispatching KeyboardEvents at the window and
* asserting the resulting store mutations / dispatched events.
*/
import React from "react";
import { render, cleanup, fireEvent } from "@testing-library/react";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { useKeyboardShortcuts } from "../useKeyboardShortcuts";
import { useCanvasStore } from "@/store/canvas";
// ─── Mock store ──────────────────────────────────────────────────────────────
const mockSavePosition = vi.fn().mockResolvedValue(undefined);
vi.mock("@/store/canvas", () => ({
useCanvasStore: Object.assign(
vi.fn((sel) => sel(mockStoreState)),
{
getState: () => mockStoreState,
}
),
}));
// Module-level mutable state so tests can mutate between cases
const mockStoreState = {
selectedNodeId: null as string | null,
selectedNodeIds: new Set<string>(),
nodes: [] as Array<{ id: string; position: { x: number; y: number }; data: { parentId?: string | null } }>,
contextMenu: null as { x: number; y: number; nodeId: string } | null,
closeContextMenu: vi.fn(),
selectNode: vi.fn(),
clearSelection: vi.fn(),
bumpZOrder: vi.fn(),
savePosition: mockSavePosition,
moveNode: vi.fn(),
};
afterEach(() => {
cleanup();
vi.clearAllMocks();
// Reset to default empty state between tests
mockStoreState.selectedNodeId = null;
mockStoreState.selectedNodeIds = new Set();
mockStoreState.nodes = [];
mockStoreState.contextMenu = null;
mockStoreState.closeContextMenu.mockClear();
mockStoreState.selectNode.mockClear();
mockStoreState.clearSelection.mockClear();
mockStoreState.bumpZOrder.mockClear();
mockStoreState.moveNode.mockClear();
mockStoreState.savePosition.mockClear();
});
// ─── Test wrapper ────────────────────────────────────────────────────────────
function ShortcutTestComponent() {
useKeyboardShortcuts();
return <div data-testid="canvas-root" />;
}
function renderWithProvider() {
return render(<ShortcutTestComponent />);
}
// ─── Tests ───────────────────────────────────────────────────────────────────
describe("Esc — deselect / close context menu", () => {
it("closes the context menu when one is open", () => {
mockStoreState.contextMenu = { x: 100, y: 100, nodeId: "n1" };
renderWithProvider();
fireEvent.keyDown(window, { key: "Escape" });
expect(mockStoreState.closeContextMenu).toHaveBeenCalledTimes(1);
});
it("clears the batch selection when no context menu is open", () => {
mockStoreState.contextMenu = null;
mockStoreState.selectedNodeIds = new Set(["n1", "n2"]);
renderWithProvider();
fireEvent.keyDown(window, { key: "Escape" });
expect(mockStoreState.clearSelection).toHaveBeenCalledTimes(1);
});
it("deselects the focused node when no batch selection exists", () => {
mockStoreState.contextMenu = null;
mockStoreState.selectedNodeIds = new Set();
mockStoreState.selectedNodeId = "n1";
renderWithProvider();
fireEvent.keyDown(window, { key: "Escape" });
expect(mockStoreState.selectNode).toHaveBeenCalledWith(null);
});
});
describe("Enter — hierarchy navigation", () => {
beforeEach(() => {
mockStoreState.selectedNodeId = "n1";
mockStoreState.nodes = [
{ id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } },
{ id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } },
{ id: "n3", position: { x: 200, y: 0 }, data: { parentId: null } },
];
});
it("navigates to the first child on Enter", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "Enter" });
expect(mockStoreState.selectNode).toHaveBeenCalledWith("n2");
});
it("navigates to the parent on Shift+Enter", () => {
mockStoreState.nodes = [
{ id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } },
{ id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } },
];
mockStoreState.selectedNodeId = "n2";
renderWithProvider();
fireEvent.keyDown(window, { key: "Enter", shiftKey: true });
expect(mockStoreState.selectNode).toHaveBeenCalledWith("n1");
});
it("does NOT navigate when no node is selected", () => {
mockStoreState.selectedNodeId = null;
renderWithProvider();
fireEvent.keyDown(window, { key: "Enter" });
expect(mockStoreState.selectNode).not.toHaveBeenCalled();
});
});
describe("Cmd+]/[ — z-order bump", () => {
beforeEach(() => {
mockStoreState.selectedNodeId = "n1";
});
it("bumps z-order forward on Cmd+]", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "]", metaKey: true });
expect(mockStoreState.bumpZOrder).toHaveBeenCalledWith("n1", 1);
});
it("bumps z-order backward on Cmd+[", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "[", metaKey: true });
expect(mockStoreState.bumpZOrder).toHaveBeenCalledWith("n1", -1);
});
it("uses Ctrl as the modifier key", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "]", ctrlKey: true });
expect(mockStoreState.bumpZOrder).toHaveBeenCalledWith("n1", 1);
});
});
describe("Z — zoom-to-team", () => {
let dispatchedEvents: CustomEvent[] = [];
beforeEach(() => {
dispatchedEvents = [];
mockStoreState.selectedNodeId = "n1";
mockStoreState.nodes = [
{ id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } },
{ id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } },
];
window.addEventListener("molecule:zoom-to-team", (e) => {
dispatchedEvents.push(e as CustomEvent);
});
});
afterEach(() => {
window.removeEventListener("molecule:zoom-to-team", () => {});
});
it("dispatches zoom-to-team when the selected node has children", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "z" });
expect(dispatchedEvents).toHaveLength(1);
expect(dispatchedEvents[0].detail.nodeId).toBe("n1");
});
it("does NOT fire when no node is selected", () => {
mockStoreState.selectedNodeId = null;
renderWithProvider();
fireEvent.keyDown(window, { key: "z" });
expect(dispatchedEvents).toHaveLength(0);
});
it("does NOT fire when the node has no children", () => {
mockStoreState.nodes = [
{ id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } },
];
renderWithProvider();
fireEvent.keyDown(window, { key: "z" });
expect(dispatchedEvents).toHaveLength(0);
});
it("skips when the target element is an input", () => {
renderWithProvider();
const input = document.createElement("input");
document.body.appendChild(input);
fireEvent.keyDown(input, { key: "z" });
expect(dispatchedEvents).toHaveLength(0);
document.body.removeChild(input);
});
});
describe("Arrow keys — keyboard node movement", () => {
beforeEach(() => {
mockStoreState.selectedNodeId = "n1";
mockStoreState.nodes = [
{ id: "n1", position: { x: 100, y: 200 }, data: { parentId: null } },
];
});
it("moves the selected node down on ArrowDown", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "ArrowDown" });
expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 0, 10);
});
it("moves the selected node up on ArrowUp", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "ArrowUp" });
expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 0, -10);
});
it("moves the selected node right on ArrowRight", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "ArrowRight" });
expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 10, 0);
});
it("moves the selected node left on ArrowLeft", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "ArrowLeft" });
expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", -10, 0);
});
it("moves 50 px when Shift is held", () => {
renderWithProvider();
fireEvent.keyDown(window, { key: "ArrowDown", shiftKey: true });
expect(mockStoreState.moveNode).toHaveBeenCalledWith("n1", 0, 50);
});
it("does NOT fire when no node is selected", () => {
mockStoreState.selectedNodeId = null;
renderWithProvider();
fireEvent.keyDown(window, { key: "ArrowDown" });
expect(mockStoreState.moveNode).not.toHaveBeenCalled();
});
it("skips when the target element is an input", () => {
renderWithProvider();
const input = document.createElement("input");
document.body.appendChild(input);
fireEvent.keyDown(input, { key: "ArrowDown" });
expect(mockStoreState.moveNode).not.toHaveBeenCalled();
document.body.removeChild(input);
});
it("skips when a modal dialog is already open", () => {
renderWithProvider();
const dialog = document.createElement("div");
dialog.setAttribute("role", "dialog");
dialog.setAttribute("aria-modal", "true");
document.body.appendChild(dialog);
fireEvent.keyDown(window, { key: "ArrowDown" });
expect(mockStoreState.moveNode).not.toHaveBeenCalled();
document.body.removeChild(dialog);
});
it("prevents default browser scroll on arrow keys", () => {
renderWithProvider();
const preventDefault = vi.fn();
fireEvent.keyDown(window, {
key: "ArrowDown",
preventDefault,
});
expect(preventDefault).toHaveBeenCalled();
});
});
describe("all shortcuts respect inInput guard", () => {
it("ArrowDown is skipped in an input element", () => {
mockStoreState.selectedNodeId = "n1";
renderWithProvider();
const textarea = document.createElement("textarea");
document.body.appendChild(textarea);
fireEvent.keyDown(textarea, { key: "ArrowDown" });
expect(mockStoreState.moveNode).not.toHaveBeenCalled();
document.body.removeChild(textarea);
});
it("Enter navigation is skipped in an input element", () => {
mockStoreState.selectedNodeId = "n1";
mockStoreState.nodes = [
{ id: "n1", position: { x: 0, y: 0 }, data: { parentId: null } },
{ id: "n2", position: { x: 100, y: 0 }, data: { parentId: "n1" } },
];
renderWithProvider();
const input = document.createElement("input");
document.body.appendChild(input);
fireEvent.keyDown(input, { key: "Enter" });
expect(mockStoreState.selectNode).not.toHaveBeenCalled();
document.body.removeChild(input);
});
});

View File

@ -14,6 +14,7 @@ import { useCanvasStore } from "@/store/canvas";
* Cmd/Ctrl+] bump selected node forward in z-order
* Cmd/Ctrl+[ bump selected node backward in z-order
* Z zoom-to-team if the selected node has children
* Arrow keys move selected node 10px (50px with Shift)
*/
export function useKeyboardShortcuts() {
useEffect(() => {
@ -80,6 +81,33 @@ export function useKeyboardShortcuts() {
);
}
}
// Arrow-key node movement — Figma-style keyboard drag for keyboard users.
// 10 px per press, 50 px with Shift held. Only fires when a node
// is selected and the target isn't a form control.
if (
!inInput &&
(e.key === "ArrowUp" ||
e.key === "ArrowDown" ||
e.key === "ArrowLeft" ||
e.key === "ArrowRight")
) {
const state = useCanvasStore.getState();
const selectedId = state.selectedNodeId;
if (!selectedId) return;
// Skip when a modal/dialog is already open — dialogs own their own
// arrow-key semantics and shouldn't trigger canvas moves.
if (document.querySelector('[role="dialog"][aria-modal="true"]')) return;
e.preventDefault();
const step = e.shiftKey ? 50 : 10;
let dx = 0;
let dy = 0;
if (e.key === "ArrowUp") dy = -step;
else if (e.key === "ArrowDown") dy = step;
else if (e.key === "ArrowLeft") dx = -step;
else dx = step;
state.moveNode(selectedId, dx, dy);
}
};
window.addEventListener("keydown", handler);
return () => window.removeEventListener("keydown", handler);

View File

@ -1012,4 +1012,3 @@ describe("handleCanvasEvent liveAnnouncement", () => {
expect(state.liveAnnouncement ?? "").toBe("");
});
});
});

View File

@ -1181,3 +1181,46 @@ describe("batchNest", () => {
expect(nestPatches).toHaveLength(1);
});
});
// ---------- moveNode ----------
describe("moveNode", () => {
beforeEach(() => {
const mock = global.fetch as ReturnType<typeof vi.fn>;
mock.mockImplementation(() =>
Promise.resolve({ ok: true, json: () => Promise.resolve({}) } as Response),
);
mock.mockClear();
});
it("updates the node's position by the given delta", () => {
useCanvasStore.getState().hydrate([
makeWS({ id: "n1", name: "Node 1", x: 100, y: 200 }),
]);
useCanvasStore.getState().selectNode("n1");
useCanvasStore.getState().moveNode("n1", 10, -50);
const node = useCanvasStore.getState().nodes.find((n) => n.id === "n1")!;
expect(node.position).toEqual({ x: 110, y: 150 });
});
it("is a no-op when the node does not exist", () => {
useCanvasStore.getState().hydrate([makeWS({ id: "n1", name: "Node 1", x: 0, y: 0 })]);
expect(() => useCanvasStore.getState().moveNode("nonexistent", 10, 10)).not.toThrow();
});
it("calls savePosition with the new absolute coordinates", async () => {
useCanvasStore.getState().hydrate([makeWS({ id: "n1", name: "Node 1", x: 100, y: 200 })]);
useCanvasStore.getState().selectNode("n1");
const mock = global.fetch as ReturnType<typeof vi.fn>;
useCanvasStore.getState().moveNode("n1", 10, 20);
await vi.waitFor(() => {
expect(mock).toHaveBeenCalledWith(
expect.stringContaining("/workspaces/n1"),
expect.objectContaining({
method: "PATCH",
body: JSON.stringify({ x: 110, y: 220 }),
}),
);
});
});
});

View File

@ -165,6 +165,13 @@ interface CanvasState {
* this so a drag that pushed a child past the parent edge commits
* the parent grow on release (commit-on-release pattern). */
growParentsToFitChildren: () => void;
/** Move a selected node by (dx, dy) in canvas space. Used by keyboard
* arrow-key shortcuts so keyboard users can reposition nodes without a
* mouse. Persists the new position to the backend and skips the
* grow-parents pass that onNodesChange runs on every drag tick
* (avoids the "edge-chase" flicker that commit-on-release is meant to
* prevent). */
moveNode: (nodeId: string, dx: number, dy: number) => void;
/** Re-layout a parent's children to the default 2-column grid. Used
* by the "Arrange children" context-menu command so users can rescue
* out-of-bounds children on demand topology no longer does it
@ -1032,6 +1039,19 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
}
},
moveNode: (nodeId, dx, dy) => {
const node = get().nodes.find((n) => n.id === nodeId);
if (!node) return;
set({
nodes: get().nodes.map((n) =>
n.id === nodeId
? { ...n, position: { x: n.position.x + dx, y: n.position.y + dy } }
: n,
),
});
void get().savePosition(nodeId, node.position.x + dx, node.position.y + dy);
},
savePosition: async (nodeId: string, x: number, y: number) => {
try {
await api.patch(`/workspaces/${nodeId}`, { x, y });

View File

@ -55,7 +55,7 @@ canvas/src/
### Node Rendering ✅ (with notes)
- **Framework:** `@xyflow/react` (React Flow) — DOM-based, not SVG/Canvas
- **Node selection:** `aria-pressed` + border ring (`border-accent/70`) + shadow
- **Node drag:** React Flow native drag — mouse only, no keyboard alternative yet
- **Node drag:** React Flow native drag + Arrow keys (10px/step, Shift 50px) — keyboard-accessible (PR #182) ✅
- **Node resize:** `NodeResizer` component visible on selected card, keyboard-inaccessible
- **Status:** Accessible via `aria-label` on node cards — "Alpha Workspace workspace — online"
@ -72,10 +72,12 @@ canvas/src/
- **Minimap:** Not present (MiniMap mocked as null in tests)
- **Status:** Basic keyboard support via viewport shortcuts
### Keyboard Shortcuts ⚠️ PARTIAL
- Exists in `useKeyboardShortcuts.ts` but no `aria-describedby` on trigger buttons
- No dedicated keyboard shortcut help dialog
- **Gap:** Users can't discover shortcuts visually
### Keyboard Shortcuts ✅ (strong)
- All shortcuts in `useKeyboardShortcuts.ts` with `inInput` guard ✅
- Global `?` shortcut opens `KeyboardShortcutsDialog` (PR #175) ✅
- Dialog: portal-based, aria-modal, focus trap, Escape close ✅
- Arrow keys move selected node 10px (50px with Shift) — keyboard node drag (this PR) ✅
- Hierarchy navigation (Enter/Shift+Enter), z-order (Cmd+]/[), zoom-to-team (Z) ✅
### Focus Management ✅ (strong)
- Skip link → `#canvas-main`
@ -83,9 +85,9 @@ canvas/src/
- Focus trap in modals via Radix ✅
- Focus ring: `focus-visible:ring-2 focus-visible:ring-blue-500 focus-visible:ring-offset-2 focus-visible:ring-offset-zinc-950`
### Accessibility Tree ⚠️ PARTIAL
### Accessibility Tree
- Canvas is in accessibility tree (React Flow DOM nodes)
- Node state changes not announced to screen readers (no `aria-live` region)
- Node state changes announced via `aria-live="polite"` region (PR #172) ✅
- Context menus announced via `role="menu"`
### Context Menus ✅ (strong)
@ -95,11 +97,10 @@ canvas/src/
- Escape + Tab close menu ✅
- Auto-focus first item on open ✅
### Drag and Drop ⚠️ PARTIAL
### Drag and Drop
- **Mouse drag:** React Flow native
- **Drop target:** Visual indicator (`bg-emerald-950/40 border-emerald-400/60`) ✅
- **Keyboard alternative:** None — nodes repositioned only via mouse drag
- **Status:** Mouse-only. Keyboard users cannot rearrange nodes.
- **Keyboard alternative:** Arrow keys move selected node 10px per press (50px with Shift) (PR #182) ✅
---
@ -109,7 +110,7 @@ canvas/src/
|----------|------|-------|--------|
| ~~HIGH~~ | ~~Screen reader announcements for canvas state changes~~ | ~~Canvas.tsx, canvas-events.ts, canvas.ts~~ | ✅ Done — PR #172 |
| MEDIUM | Keyboard shortcut help dialog | useKeyboardShortcuts.ts | ✅ Done (PR #175) |
| MEDIUM | Keyboard-accessible node drag | WorkspaceNode.tsx, useDragHandlers.ts | Not started |
| MEDIUM | Keyboard-accessible node drag | WorkspaceNode.tsx, useDragHandlers.ts | ✅ Done (this PR) |
| LOW | Edge anchor keyboard accessibility | A2AEdge.tsx | Not started |
| LOW | Node resize keyboard accessibility | WorkspaceNode.tsx (NodeResizer) | Not started |

View File

@ -490,7 +490,14 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
if logActivity && deliveryConfirmed {
h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs)
}
return 0, nil, &proxyA2AError{
// Preserve the actual HTTP status code and any body bytes already read.
// Previously this returned (0, nil, error) which discarded both.
// Preserving them allows executeDelegation's new condition
// proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300
// to correctly route delivery-confirmed responses (where the agent completed
// the work but the TCP connection dropped before the full body was received)
// to success instead of failure (#159).
return resp.StatusCode, respBody, &proxyA2AError{
Status: http.StatusBadGateway,
Response: gin.H{
"error": "failed to read agent response",

View File

@ -348,7 +348,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
// received). Treat as success: the response body is valid and the work is done.
// This prevents "retry storms" where the canvas sees error + Restart-workspace
// suggestion even though the delegation actually completed.
if proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 {
if isDeliveryConfirmedSuccess(proxyErr, status, respBody) {
log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success",
delegationID, status, len(respBody), proxyErr.Error())
goto handleSuccess
@ -685,6 +685,34 @@ func isTransientProxyError(err *proxyA2AError) bool {
return false
}
// isDeliveryConfirmedSuccess reports whether the proxy's `(status, body, err)`
// triple represents a delivery-confirmed success: the proxy hit a transport-
// layer error AFTER receiving a complete 2xx response with a non-empty body.
// In that case the agent did the work — the error is on the wire, not in the
// agent — so the delegation should be marked succeeded rather than failed
// (preventing the retry-storm + restart-suggest cascade described in #159).
//
// Caller invariants:
// - proxyErr != nil: a delivery error fired (e.g. connection reset).
// - len(respBody) > 0: a response body was received before the error.
// - 200 <= status < 300: the partial response carried a 2xx code.
//
// All three must hold. nil proxyErr → no decision to make (success path
// already chosen upstream). Empty body → no work-result to recover. Non-2xx →
// the agent itself signalled failure or transient state; don't promote it.
func isDeliveryConfirmedSuccess(proxyErr *proxyA2AError, status int, respBody []byte) bool {
if proxyErr == nil {
return false
}
if len(respBody) == 0 {
return false
}
if status < 200 || status >= 300 {
return false
}
return true
}
// isQueuedProxyResponse reports whether the proxy returned a body shaped like
// `{"queued": true, "queue_id": ..., "queue_depth": ..., "message": ...}` —
// the busy-target enqueue path in a2a_proxy_helpers.go. Caller checks this

View File

@ -5,8 +5,10 @@ import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
@ -376,6 +378,44 @@ func TestIsTransientProxyError_RetriesOnRestartRaceStatuses(t *testing.T) {
}
}
// TestIsDeliveryConfirmedSuccess — regression guard for #159: the proxy can
// return a complete 2xx body and THEN raise a transport error (e.g. the TCP
// connection drops after the response is received but before close). In that
// case the agent did the work; marking the delegation "failed" causes the
// retry-storm + Restart-workspace cascade described in #159. The new helper
// distinguishes this from genuine failures.
func TestIsDeliveryConfirmedSuccess(t *testing.T) {
connErr := &proxyA2AError{Status: http.StatusOK, Response: gin.H{}}
cases := []struct {
name string
proxyErr *proxyA2AError
status int
body []byte
expect bool
}{
// The new branch: 2xx + body + transport error → recover as success.
{"200 + body + connreset (THE bug fix path)", connErr, http.StatusOK, []byte(`{"text":"ok"}`), true},
{"299 + body + connreset (boundary high)", connErr, 299, []byte(`{"text":"ok"}`), true},
{"200 + body + connreset (boundary low)", connErr, 200, []byte(`{"x":1}`), true},
// Negative cases: any one of the three preconditions failing → false.
{"nil proxyErr (no decision to make)", nil, http.StatusOK, []byte(`{"text":"ok"}`), false},
{"empty body (no work-result to recover)", connErr, http.StatusOK, []byte{}, false},
{"nil body (no work-result to recover)", connErr, http.StatusOK, nil, false},
{"4xx with body — agent signalled failure, do not promote", connErr, http.StatusBadRequest, []byte(`{"err":"bad"}`), false},
{"5xx with body — agent signalled failure, do not promote", connErr, http.StatusInternalServerError, []byte(`{"err":"crash"}`), false},
{"3xx with body — redirect, not a result", connErr, 301, []byte(`{"loc":"/x"}`), false},
{"199 status (under 200) — not a 2xx", connErr, 199, []byte(`{"x":1}`), false},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := isDeliveryConfirmedSuccess(tc.proxyErr, tc.status, tc.body); got != tc.expect {
t.Errorf("isDeliveryConfirmedSuccess(%v, %d, %q) = %v, want %v",
tc.proxyErr, tc.status, string(tc.body), got, tc.expect)
}
})
}
}
func TestIsQueuedProxyResponse(t *testing.T) {
// Regression guard for the chat-leak bug: when the proxy returns
// 202 with a queued-shape body, executeDelegation must classify it
@ -918,3 +958,308 @@ func TestInsertDelegationOutcome_ZeroValueIsUnknown(t *testing.T) {
t.Errorf("insertOutcomeUnknown must not collide with insertOK")
}
}
// ==================== executeDelegation — delivery-confirmed proxy error regression tests ====================
//
// These test the fix for issue #159: when proxyA2ARequest returns an error but we have a
// non-empty response body with a 2xx status code, executeDelegation must treat it as success.
// The error is a delivery/transport error (e.g., connection reset after response was received).
// Previously, executeDelegation marked these as "failed" even though the work was done,
// causing retry storms and "error" rendering in canvas despite the response being available.
//
// Test strategy: spin up a mock A2A agent server, set up the source/target DB rows, call
// executeDelegation directly, and verify the activity_logs status and delegation status.
const testDelegationID = "del-159-test"
const testSourceID = "ws-source-159"
const testTargetID = "ws-target-159"
// expectExecuteDelegationBase sets up sqlmock expectations for the DB queries that
// executeDelegation always makes, regardless of outcome.
func expectExecuteDelegationBase(mock sqlmock.Sqlmock) {
// updateDelegationStatus: dispatched
// Uses prefix match — sqlmock regexes match the full query string.
mock.ExpectExec("UPDATE activity_logs SET status").
WithArgs("dispatched", "", testSourceID, testDelegationID).
WillReturnResult(sqlmock.NewResult(0, 1))
// CanCommunicate (source=target self-call is always allowed — no DB lookup needed)
// resolveAgentURL: reads ws:{id}:url from Redis, falls back to DB for target
mock.ExpectQuery("SELECT url, status FROM workspaces WHERE id = ").
WithArgs(testTargetID).
WillReturnRows(sqlmock.NewRows([]string{"url", "status"}).AddRow("", "online"))
}
// expectExecuteDelegationSuccess sets up expectations for a completed delegation.
func expectExecuteDelegationSuccess(mock sqlmock.Sqlmock, respBody string) {
// INSERT activity_logs for delegation completion (response_body status = 'completed')
mock.ExpectExec("INSERT INTO activity_logs").
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), "completed").
WillReturnResult(sqlmock.NewResult(0, 1))
// updateDelegationStatus: completed
mock.ExpectExec("UPDATE activity_logs SET status").
WithArgs("completed", "", testSourceID, testDelegationID).
WillReturnResult(sqlmock.NewResult(0, 1))
}
// expectExecuteDelegationFailed sets up expectations for a failed delegation.
func expectExecuteDelegationFailed(mock sqlmock.Sqlmock) {
// INSERT activity_logs for delegation failure (response_body status = 'failed')
mock.ExpectExec("INSERT INTO activity_logs").
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), "failed").
WillReturnResult(sqlmock.NewResult(0, 1))
// updateDelegationStatus: failed
mock.ExpectExec("UPDATE activity_logs SET status").
WithArgs("failed", sqlmock.AnyArg(), testSourceID, testDelegationID).
WillReturnResult(sqlmock.NewResult(0, 1))
}
// TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess is the primary regression
// test for issue #159. The scenario:
// - Attempt 1: server sends 200 OK headers + partial body, then closes connection.
// proxyA2ARequest: body read gets io.EOF (partial body read), returns (200, <partial>, BadGateway).
// isTransientProxyError(BadGateway) = TRUE → retry.
// - Attempt 2: server does the same thing (closes after partial body).
// proxyA2ARequest: same (200, <partial>, BadGateway).
// isTransientProxyError(BadGateway) = TRUE → retry AGAIN (but outer context will fire soon,
// or we get one more attempt). For the test we let it run.
// POST-FIX: the executeDelegation new condition sees status=200, body=<partial>, err!=nil
// and routes to handleSuccess immediately.
//
// The key pre/post-fix difference: pre-fix, executeDelegation received status=0 (hardcoded)
// even when the server sent 200, so the condition always failed. Post-fix, status=200 is
// preserved through the error return path (proxyA2ARequest now returns resp.StatusCode, respBody).
// In this test the retry ultimately succeeds (server eventually sends full body), but
// the critical assertion is that a 2xx partial-body delivery-confirmed response is never
// classified as "failed" — it always routes to success.
func TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
allowLoopbackForTest(t)
broadcaster := newTestBroadcaster()
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
dh := NewDelegationHandler(wh, broadcaster)
// Server that sends a 200 response with declared Content-Length but closes
// the connection before sending all bytes. Go's http.Client sees io.EOF on
// the body read. proxyA2ARequest captures the partial body + status=200 and
// returns (200, <partial>, error). executeDelegation's new condition sees
// status=200 + body > 0 + error != nil → routes to handleSuccess.
var wg sync.WaitGroup
wg.Add(1)
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("failed to listen: %v", err)
}
defer ln.Close()
go func() {
defer wg.Done()
conn, err := ln.Accept()
if err != nil {
return
}
defer conn.Close()
// Consume the HTTP request
buf := make([]byte, 2048)
conn.Read(buf)
// Send 200 OK with Content-Length: 100 but only 74 bytes of body
// (less than declared length → io.LimitReader returns io.EOF after reading all 74)
resp := "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n"
resp += `{"result":{"parts":[{"text":"work completed successfully"}]}}` // 74 bytes
conn.Write([]byte(resp))
// Close immediately — client gets io.EOF on body read
}()
agentURL := "http://" + ln.Addr().String()
mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL)
allowLoopbackForTest(t)
expectExecuteDelegationBase(mock)
expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"work completed successfully"}]}}`)
// Execute synchronously (not as a goroutine) so we can check DB state immediately.
// The handler fires it as goroutine; we call it directly for deterministic testing.
a2aBody, _ := json.Marshal(map[string]interface{}{
"jsonrpc": "2.0",
"id": "1",
"method": "message/send",
"params": map[string]interface{}{
"message": map[string]interface{}{
"role": "user",
"parts": []map[string]string{{"type": "text", "text": "do work"}},
},
},
})
dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
time.Sleep(100 * time.Millisecond) // let DB writes settle
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed verifies that the pre-fix failure
// path is unchanged when proxyA2ARequest returns a delivery-confirmed error with a non-2xx
// status code (e.g., 500 Internal Server Error with partial body read before connection drop).
// The new condition requires status >= 200 && status < 300, so non-2xx always routes to failure.
func TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
allowLoopbackForTest(t)
broadcaster := newTestBroadcaster()
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
dh := NewDelegationHandler(wh, broadcaster)
// Server returns 500 with declared Content-Length but closes connection early.
// proxyA2ARequest: reads 500 headers, partial body, then connection drop → body read error.
// Returns (500, <partial_body>, BadGateway).
// New condition: status=500 is NOT >= 200 && < 300 → routes to failure.
// isTransientProxyError(500) = false → no retry.
var wg sync.WaitGroup
wg.Add(1)
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("failed to listen: %v", err)
}
defer ln.Close()
go func() {
defer wg.Done()
conn, err := ln.Accept()
if err != nil {
return
}
defer conn.Close()
buf := make([]byte, 2048)
conn.Read(buf)
// 500 with Content-Length: 100 but only ~60 bytes of body
resp := "HTTP/1.1 500 Internal Server Error\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n"
resp += `{"error":"agent crashed"}` // ~24 bytes, less than declared
conn.Write([]byte(resp))
// Close immediately — client gets io.EOF on body read
}()
agentURL := "http://" + ln.Addr().String()
mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL)
allowLoopbackForTest(t)
expectExecuteDelegationBase(mock)
expectExecuteDelegationFailed(mock)
a2aBody, _ := json.Marshal(map[string]interface{}{
"jsonrpc": "2.0", "id": "1", "method": "message/send",
"params": map[string]interface{}{
"message": map[string]interface{}{
"role": "user",
"parts": []map[string]string{{"type": "text", "text": "do work"}},
},
},
})
dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
time.Sleep(100 * time.Millisecond)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed verifies that the pre-fix failure
// path is unchanged when proxyA2ARequest returns an error with a 2xx status but empty body.
// The new condition requires len(respBody) > 0, so empty body routes to failure.
func TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
allowLoopbackForTest(t)
broadcaster := newTestBroadcaster()
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
dh := NewDelegationHandler(wh, broadcaster)
// Server returns 502 Bad Gateway — proxyA2ARequest returns 502, body="" (empty), error != nil.
// New condition: proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300
// → len(respBody) == 0 → condition FALSE → falls through to failure.
// isTransientProxyError(502) is TRUE → retry → same result → failure.
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusBadGateway)
// No body — connection closes normally
}))
defer agentServer.Close()
mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL)
allowLoopbackForTest(t)
// First attempt: updateDelegationStatus(dispatched) — from expectExecuteDelegationBase
expectExecuteDelegationBase(mock)
// Second attempt (retry): updateDelegationStatus(dispatched) again
mock.ExpectExec("UPDATE activity_logs SET status").
WithArgs("dispatched", "", testSourceID, testDelegationID).
WillReturnResult(sqlmock.NewResult(0, 1))
// Failure: INSERT + UPDATE (failed)
expectExecuteDelegationFailed(mock)
a2aBody, _ := json.Marshal(map[string]interface{}{
"jsonrpc": "2.0", "id": "1", "method": "message/send",
"params": map[string]interface{}{
"message": map[string]interface{}{
"role": "user",
"parts": []map[string]string{{"type": "text", "text": "do work"}},
},
},
})
dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
time.Sleep(100 * time.Millisecond)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestExecuteDelegation_CleanProxyResponse_Unchanged verifies that a clean proxy response
// (no error, 200 with body) is unaffected by the new condition. This is the baseline:
// proxyErr == nil so the new condition never fires.
func TestExecuteDelegation_CleanProxyResponse_Unchanged(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
allowLoopbackForTest(t)
broadcaster := newTestBroadcaster()
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
dh := NewDelegationHandler(wh, broadcaster)
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{"result":{"parts":[{"text":"all good"}]}}`))
}))
defer agentServer.Close()
mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL)
allowLoopbackForTest(t)
expectExecuteDelegationBase(mock)
expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"all good"}]}}`)
a2aBody, _ := json.Marshal(map[string]interface{}{
"jsonrpc": "2.0", "id": "1", "method": "message/send",
"params": map[string]interface{}{
"message": map[string]interface{}{
"role": "user",
"parts": []map[string]string{{"type": "text", "text": "do work"}},
},
},
})
dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
time.Sleep(100 * time.Millisecond)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
}

View File

@ -6,12 +6,23 @@ import (
)
// StartSweeperWithIntervalForTest exposes startSweeperWithInterval to
// the external test package. The production code uses StartSweeper
// the external test package. The production code uses StartSeper
// (which pins the canonical SweepInterval); tests pin a short interval
// to exercise the ticker-driven cycle without burning real wall-clock
// time. The Go convention `export_test.go` keeps this seam OUT of the
// production binary — files ending in _test.go are stripped at build
// time, so this re-export only exists during `go test`.
func StartSweeperWithIntervalForTest(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
startSweeperWithInterval(ctx, storage, ackRetention, interval)
startSweeperWithInterval(ctx, storage, ackRetention, interval, nil)
}
// StartSweeperForTest starts the sweeper and returns a done channel
// that is closed exactly once when the loop exits. Tests MUST receive
// from done before returning so the goroutine has fully terminated and
// the shared metric counters are stable for the next test's baseline
// capture (issue #86).
func StartSweeperForTest(ctx context.Context, storage Storage, ackRetention time.Duration) chan struct{} {
done := make(chan struct{})
go startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval, done)
return done
}

View File

@ -66,15 +66,21 @@ const sweepDeadline = 30 * time.Second
// to exercise the ticker-driven sweep path without burning real wall-
// clock time.
func StartSweeper(ctx context.Context, storage Storage, ackRetention time.Duration) {
startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval)
startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval, nil)
}
// startSweeperWithInterval is the test-friendly variant of StartSweeper
// — same loop, but the cadence is caller-specified. Production code
// should use StartSweeper to keep the SweepInterval constant pinned.
func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
// If done is non-nil it is closed exactly once when the loop exits,
// allowing tests to wait for the goroutine to fully terminate before
// asserting on shared metric counters (issue #86).
func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration, done chan struct{}) {
if storage == nil {
log.Println("pendinguploads sweeper: storage is nil — sweeper disabled")
if done != nil {
close(done)
}
return
}
if ackRetention == 0 {
@ -86,6 +92,12 @@ func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention
)
ticker := time.NewTicker(interval)
defer ticker.Stop()
defer func() {
log.Println("pendinguploads sweeper: shutdown")
if done != nil {
close(done)
}
}()
// Run once immediately so a platform restart cleans up any rows
// that became eligible while we were down — don't make the
// operator wait 5 minutes for the first sweep.
@ -93,9 +105,16 @@ func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention
for {
select {
case <-ctx.Done():
log.Println("pendinguploads sweeper: shutdown")
return
case <-ticker.C:
// Guard: ctx may have been cancelled between the ticker firing
// and this case being selected (MPMC channel; close-to-ready race).
// Calling sweepOnce with an already-cancelled ctx would increment
// the error counter on shutdown — polluting the next test's
// baseline (issue #86 full-suite failure).
if ctx.Err() != nil {
continue
}
sweepOnce(ctx, storage, ackRetention)
}
}

View File

@ -136,7 +136,7 @@ func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, time.Hour)
done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour)
store.waitForCycle(t, 1, 2*time.Second)
if got := store.calls.Load(); got < 1 {
t.Errorf("expected at least one immediate sweep, got %d", got)
@ -145,6 +145,10 @@ func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) {
if store.gotRetention.Load() != 3600 {
t.Errorf("retention seconds = %d, want 3600", store.gotRetention.Load())
}
// #86 fix: ensure goroutine has exited before the next test's
// metricDelta() baseline capture.
cancel()
<-done
}
func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) {
@ -152,23 +156,22 @@ func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, 0)
done := pendinguploads.StartSweeperForTest(ctx, store, 0)
store.waitForCycle(t, 1, 2*time.Second)
want := int64(pendinguploads.DefaultAckRetention.Seconds())
if store.gotRetention.Load() != want {
t.Errorf("retention = %d, want default %d", store.gotRetention.Load(), want)
}
// #86 fix.
cancel()
<-done
}
func TestStartSweeper_ContextCancelStopsLoop(t *testing.T) {
store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
ctx, cancel := context.WithCancel(context.Background())
done := make(chan struct{})
go func() {
pendinguploads.StartSweeper(ctx, store, time.Second)
close(done)
}()
done := pendinguploads.StartSweeperForTest(ctx, store, time.Second)
store.waitForCycle(t, 1, 2*time.Second)
cancel()
@ -187,14 +190,17 @@ func TestStartSweeperWithInterval_TickerFiresAdditionalCycles(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeperWithIntervalForTest(ctx, store, time.Hour, 30*time.Millisecond)
done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour)
// Immediate cycle + at least one tick-driven cycle.
store.waitForCycle(t, 2, 2*time.Second)
if got := store.calls.Load(); got < 2 {
t.Errorf("expected ≥2 cycles (immediate + 1 tick), got %d", got)
}
// #86 fix: drain the done channel so the goroutine is fully gone
// before the next test's metricDelta() baseline capture.
cancel()
<-done
}
func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
@ -217,7 +223,7 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
// waitForCycle is too early.
_, _, deltaError := metricDelta(t)
go pendinguploads.StartSweeper(ctx, store, time.Hour)
done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour)
// Wait for the first (errored) cycle.
store.waitForCycle(t, 1, 2*time.Second)
@ -226,11 +232,13 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
// stops the loop on the next select pass with no in-flight metric
// writes outstanding.
waitForMetricDelta(t, deltaError, 1, 2*time.Second)
// Cancel — the goroutine returns cleanly, proving the error path
// didn't crash the loop. Without this fix the goroutine would have
// either panicked (process abort visible at exit) or stuck (this
// cancel + done-channel pattern would deadlock instead).
// Cancel and wait for the goroutine to fully exit (#86 fix).
// Without the done-channel wait, the goroutine races with the next
// test's metricDelta() baseline capture — the next test may see
// error=1 from this test still "in flight", throwing off its
// deltaError assertion.
cancel()
<-done
}
// metricDelta returns a function that, when called, returns how much
@ -265,7 +273,7 @@ func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, time.Hour)
done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour)
store.waitForCycle(t, 1, 2*time.Second)
// Poll for the success counters to settle — closes the cycleDone-
@ -278,6 +286,15 @@ func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) {
if got := deltaError(); got != 0 {
t.Errorf("error counter delta = %d, want 0", got)
}
// #86 fix: drain the done channel so the goroutine is fully gone
// before the next test's metricDelta() baseline capture. Without this
// the previous test's goroutine could still be mid-Sweep (blocked on
// the fake's results channel) and its eventual return would mutate
// the shared error/acked counters after the next test has already
// snapshot its baseline.
cancel()
<-done
}
func TestStartSweeper_RecordsMetricsOnError(t *testing.T) {
@ -290,7 +307,7 @@ func TestStartSweeper_RecordsMetricsOnError(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, time.Hour)
done := pendinguploads.StartSweeperForTest(ctx, store, time.Hour)
store.waitForCycle(t, 1, 2*time.Second)
// Poll for the error counter to settle — cycleDone fires inside
@ -300,4 +317,9 @@ func TestStartSweeper_RecordsMetricsOnError(t *testing.T) {
// though the metric WILL be 1 a few ms later. See
// waitForMetricDelta comment.
waitForMetricDelta(t, deltaError, 1, 2*time.Second)
// #86 fix: ensure the goroutine has fully exited before the next
// test's metricDelta() baseline capture.
cancel()
<-done
}

View File

@ -132,19 +132,20 @@ func TestLocalResolver_HonoursContextCancellation(t *testing.T) {
}
func TestLocalResolver_BubblesUpCopyFailure(t *testing.T) {
// os.Chmod(dst, 0o555) silently passes when os.Geteuid() == 0
// (root bypasses POSIX permission checks). We cannot reliably
// exercise the write-failure branch in a root environment without
// patching the syscalls, so skip it honestly.
// Source file the copyTree walk would read; make dst unwritable so
// the copyFile step fails. Skip when running as root — Linux
// filesystem permissions are advisory-only for uid 0, so chmod 0o555
// does not prevent writes and the test passes vacuously instead of
// exercising the error path (issue #87).
if os.Getuid() == 0 {
t.Skip("running as root — cannot exercise write-failure branch")
t.Skip("skipping: chmod 0o555 is ineffective when running as root")
}
base := t.TempDir()
writePlugin(t, base, "demo", map[string]string{
"plugin.yaml": "name: demo\n",
})
dst := t.TempDir()
// Make dst read-only so creating files inside it fails.
if err := os.Chmod(dst, 0o555); err != nil {
t.Fatal(err)
}

View File

@ -401,6 +401,35 @@ if "a2a" not in sys.modules:
# tests now live in the claude-code template repo, where the real SDK
# IS installed via Dockerfile, so no stub is needed.
# ==================== Test isolation fixtures ====================
import pytest
@pytest.fixture(scope="function", autouse=True)
def _clear_platform_auth_cache():
"""Reset platform_auth._cached_token before each test.
Fixes issue #160: tests that use monkeypatch.delenv("MOLECULE_WORKSPACE_TOKEN")
to simulate "no token in env" fail when platform_auth._cached_token was already
set from a prior test's MOLECULE_WORKSPACE_TOKEN value. The cache is populated
at module import or first get_token() call and persists for the process lifetime
monkeypatch.delenv removes the env var but not the module-level cache.
Run at function scope so each test starts with a clean slate regardless of
what the previous test set. The import is inside the fixture (not at file
top-level) because conftest.py runs during test collection before
platform_auth might be available in all test environments. If the module is
absent (import error), the fixture is a no-op.
"""
try:
import platform_auth as _pa
_pa.clear_cache()
except ImportError:
pass
yield # run the test, then fixture teardown has nothing to do
if "langchain_core" not in sys.modules:
_make_langchain_mocks()