fix(a2a): raise ResponseHeaderTimeout 5m→30m for long synchronous turns (core#2723) #2749
@@ -132,15 +132,20 @@ func readBodyWithLimit(r io.Reader, limit int, kind string) ([]byte, error) {
|
||||
// a generic 502 page to canvas. 10s is well above realistic intra-region
|
||||
// latencies and well below CF's edge timeout.
|
||||
//
|
||||
// 3. Transport.ResponseHeaderTimeout — 5min default. From request-body-end
|
||||
// 3. Transport.ResponseHeaderTimeout — 30min default. From request-body-end
|
||||
// to response-headers-start. Configurable via
|
||||
// A2A_PROXY_RESPONSE_HEADER_TIMEOUT (envx.Duration). Covers cold-start
|
||||
// first-byte (30-60s OAuth flow above) with enough room for Opus agent
|
||||
// turns and Codex scheduled tasks (big context + internal delegate_task
|
||||
// round-trips routinely exceed the old 60s/180s ceilings). Body streaming
|
||||
// after headers is governed by the
|
||||
// per-request context deadline, NOT this timeout — so multi-minute agent
|
||||
// responses still work fine.
|
||||
// first-byte (30-60s OAuth flow above) AND long SYNCHRONOUS autonomous
|
||||
// turns where the runtime computes the whole response before sending
|
||||
// headers (it does NOT always stream a 200 early). A real "migrate from
|
||||
// blob" turn ran 443s and was killed at the old 5min default with
|
||||
// `timeout awaiting response headers` (core#2723 class — the SAME long-turn
|
||||
// cut as the idle watchdog). Aligned to 30min = the agent-to-agent
|
||||
// ceiling + the canvas idle default, so no LEGIT turn (none exceed that)
|
||||
// trips it; a genuinely-stuck agent is surfaced by DialContext (fast,
|
||||
// connection-level) + the reactive-health/heartbeat path, not by cutting
|
||||
// a working turn short. Body streaming after headers is governed by the
|
||||
// per-request context deadline, NOT this timeout.
|
||||
//
|
||||
// The point of (2) and (3) is to surface a *structured* 503 from
|
||||
// handleA2ADispatchError when the workspace agent is unreachable, so canvas
|
||||
@@ -153,7 +158,7 @@ var a2aClient = &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: envx.Duration("A2A_PROXY_RESPONSE_HEADER_TIMEOUT", 5*time.Minute),
|
||||
ResponseHeaderTimeout: envx.Duration("A2A_PROXY_RESPONSE_HEADER_TIMEOUT", 30*time.Minute),
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
// MaxIdleConns / IdleConnTimeout: stdlib defaults are fine; agent
|
||||
// fan-in is bounded by the platform's broadcaster fan-out, not by
|
||||
|
||||
@@ -2850,7 +2850,10 @@ func TestLookupDeliveryMode_ContextCanceled_FailsClosed(t *testing.T) {
|
||||
// ==================== a2aClient ResponseHeaderTimeout config ====================
|
||||
|
||||
func TestA2AClientResponseHeaderTimeout(t *testing.T) {
|
||||
const defaultTimeout = 5 * time.Minute
|
||||
// core#2723 class: raised 5min→30min so long synchronous autonomous turns
|
||||
// (e.g. a 443s "migrate from blob" run that errored with "timeout awaiting
|
||||
// response headers") aren't cut short. Aligned to the agent ceiling + idle.
|
||||
const defaultTimeout = 30 * time.Minute
|
||||
|
||||
// Default (unset env) — a2aClient was initialised at package load time.
|
||||
if a2aClient.Transport.(*http.Transport).ResponseHeaderTimeout != defaultTimeout {
|
||||
|
||||
Reference in New Issue
Block a user