fix(canvas-chat): treat Cloudflare 524/522/504 as 'still processing', not unreachable (core#2697) #2750

Merged
devops-engineer merged 2 commits from fix/chat-524-not-unreachable into main 2026-06-13 12:34:50 +00:00
3 changed files with 54 additions and 2 deletions
@@ -82,4 +82,38 @@ describe("useChatSend — multi-send (core#2697 feature 2)", () => {
expect(onUserMessage).not.toHaveBeenCalled();
expect(apiPostMock).not.toHaveBeenCalled();
});
it("treats a Cloudflare 524 gateway timeout as 'still processing' (no unreachable banner)", async () => {
// A long turn outlives CF's ~100s edge limit → api.post throws an Error
// with .status=524. The agent is still working; reply arrives via WS.
const err = Object.assign(new Error("API POST /workspaces/ws-1/a2a: 524 "), { status: 524 });
apiPostMock.mockRejectedValueOnce(err);
const onUserMessage = vi.fn();
const { result } = renderHook(() =>
useChatSend("ws-1", { getHistoryMessages: () => [], onUserMessage }),
);
await act(async () => {
await result.current.sendMessage("long migrate task");
await Promise.resolve(); await Promise.resolve();
});
// Spinner stays (sending true), NO error banner.
expect(result.current.sending).toBe(true);
expect(result.current.error).toBeNull();
});
it("a Cloudflare 522 (couldn't connect to origin) DOES surface the unreachable banner", async () => {
// CR2 distinction: 522 = CF couldn't establish a connection to the origin
// = genuinely unreachable. Unlike 524 (accepted + slow), 522 must NOT be
// swallowed — show the error so the user knows the message didn't land.
const err = Object.assign(new Error("API POST /workspaces/ws-1/a2a: 522 "), { status: 522 });
apiPostMock.mockRejectedValueOnce(err);
const { result } = renderHook(() =>
useChatSend("ws-1", { getHistoryMessages: () => [] }),
);
await act(async () => {
await result.current.sendMessage("hi");
await Promise.resolve(); await Promise.resolve();
});
expect(result.current.error).toMatch(/unreachable/i);
});
});
@@ -322,7 +322,20 @@ export function useChatSend(workspaceId: string, options: UseChatSendOptions) {
const isClientTimeout =
e !== null && typeof e === "object" &&
"name" in e && (e as { name: unknown }).name === "TimeoutError";
if (isClientTimeout) {
// CLOUDFLARE 524 ≠ UNREACHABLE (jrs-auto, 2026-06-13). The canvas→agent
// A2A POST is held open for the whole turn; a turn that runs longer
// than Cloudflare's ~100s edge limit gets a 524 ("A Timeout Occurred")
// — the origin ACCEPTED the request and is still processing it (the
// agent is visibly running tools), and its reply arrives via the
// AGENT_MESSAGE WebSocket event, exactly like the client-timeout case.
// ONLY 524: per CR2, 522 ("Connection Timed Out" — CF couldn't even
// connect to the origin) and 504 mean the request was NOT accepted /
// the origin is genuinely unreachable, so those MUST still surface the
// error banner. Don't conflate "accepted + slow" (524) with "couldn't
// connect" (522).
const status = (e as { status?: number } | null)?.status;
const isCloudflareHeldRequest = status === 524;
if (isClientTimeout || isCloudflareHeldRequest) {
return; // delivered; reply (and guard release) arrives via WS
}
releaseSendGuards();
+6 -1
View File
@@ -167,7 +167,12 @@ async function request<T>(
if (err instanceof PlatformUnavailableError) throw err;
}
}
throw new Error(`API ${method} ${path}: ${res.status} ${text}`);
const apiErr = new Error(`API ${method} ${path}: ${res.status} ${text}`);
// Attach the HTTP status so callers can branch without parsing the
// message (e.g. useChatSend treats a Cloudflare 524/522/504 on a held
// long A2A turn as "still processing", not "agent unreachable").
(apiErr as Error & { status?: number }).status = res.status;
throw apiErr;
}
return res.json();
}