From ba8c571d650a03441f8a953e8644e5be77f38964 Mon Sep 17 00:00:00 2001 From: hongming-ceo-delegated Date: Sat, 23 May 2026 00:40:01 -0700 Subject: [PATCH 1/2] fix: scheduler detectResultKind allowlist `message` + `task` envelope kinds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PM scheduler hit 21 consecutive false-failure ticks (canvas screenshot 2026-05-23) before the auto-disable kicked in. Root cause: detectResultKind treated every result.kind != "ok" / "" as a non-ok SDK signal — but the A2A-SDK canonical successful response envelope is: {"jsonrpc":"2.0","result":{"kind":"message","parts":[...]},"id":"..."} `kind: "message"` means "this is a Message envelope" (success), NOT an error. The original #1696 allowlist of just {"", "ok"} mis-flagged every successful agent reply as an SDK error and gated PM ticks at status 'message', which propagated to the canvas Schedule tab as "SDK error: result_kind=message". Fix: extract a `resultOKKinds` allowlist {"", "ok", "message", "task"}. Anything outside is treated as a non-ok signal as before. "task" is the A2A Task envelope (async in-progress task return); also a success-shape. Regression guard tests added. Follow-up: after this lands, re-enable any schedules that were auto-disabled by the false-positive (PM = deedcb61 the most affected). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/scheduler/scheduler.go | 41 +++++++++++++++---- .../internal/scheduler/scheduler_test.go | 19 +++++++++ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/workspace-server/internal/scheduler/scheduler.go b/workspace-server/internal/scheduler/scheduler.go index 186a60be3..2a750fcad 100644 --- a/workspace-server/internal/scheduler/scheduler.go +++ b/workspace-server/internal/scheduler/scheduler.go @@ -832,9 +832,18 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) { // exhaustion, SDK internal errors) — the error surfaces only in the response // body under result.kind or result.result_kind. // -// Returns an empty string when the response is clean (result_kind is "ok" or -// absent). Returns the result_kind value when it is a non-ok signal, so callers -// can propagate it as the schedule's last_status. +// Returns an empty string when the response is clean (result_kind is "ok", +// "message" — the A2A-SDK canonical successful Message envelope — or absent). +// Returns the result_kind value when it is a non-ok signal, so callers can +// propagate it as the schedule's last_status. +// +// Known successful (= treat-as-ok) kinds (resultOKKinds): +// - "ok" — explicit success signal +// - "message" — A2A-SDK Message envelope (`{"result":{"kind":"message","parts":[...]}}`), +// emitted by every successful agent reply. Fix: #1696 originally allow-listed only +// "ok" / empty, which mis-flagged every successful agent response as an SDK error +// (PM scheduler observed 21 consecutive false-failure ticks before auto-disable; +// screenshot 2026-05-23). See [#1696 follow-up]. // // Known non-ok kinds: // - "rate_limited" — LLM API rate-limit hit (Max-plan, etc.) @@ -842,6 +851,18 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) { // - "sdk_error" — SDK threw an internal error // // See #1696. +// +// resultOKKinds is the allowlist of `result.kind` values that mean "this +// agent run succeeded". Anything outside this set is treated as a non-ok +// SDK signal. Add to this list when new successful envelope kinds are +// introduced upstream. +var resultOKKinds = map[string]struct{}{ + "": {}, // absent / empty → treat as ok (no signal) + "ok": {}, // explicit success + "message": {}, // A2A-SDK Message envelope (successful agent reply) + "task": {}, // A2A-SDK Task envelope (in-progress async task) — also a success-shape signal, not an error +} + func detectResultKind(body []byte) string { if len(body) == 0 { return "" @@ -854,18 +875,22 @@ func detectResultKind(body []byte) string { if rawResult, ok := top["result"]; ok { var result map[string]json.RawMessage if err := json.Unmarshal(rawResult, &result); err == nil { - // result.kind (canonical JSON-RPC error envelope field). + // result.kind (canonical JSON-RPC envelope field). if rawKind, ok := result["kind"]; ok { var k string - if json.Unmarshal(rawKind, &k) == nil && k != "" && k != "ok" { - return k + if json.Unmarshal(rawKind, &k) == nil { + if _, isOK := resultOKKinds[k]; !isOK { + return k + } } } // result.result_kind (legacy / alternative field name). if rawKind, ok := result["result_kind"]; ok { var k string - if json.Unmarshal(rawKind, &k) == nil && k != "" && k != "ok" { - return k + if json.Unmarshal(rawKind, &k) == nil { + if _, isOK := resultOKKinds[k]; !isOK { + return k + } } } } diff --git a/workspace-server/internal/scheduler/scheduler_test.go b/workspace-server/internal/scheduler/scheduler_test.go index 9f16e9b30..4e26fc01c 100644 --- a/workspace-server/internal/scheduler/scheduler_test.go +++ b/workspace-server/internal/scheduler/scheduler_test.go @@ -702,6 +702,25 @@ func TestDetectResultKind(t *testing.T) { body: `{"result":{"result_kind":"ok","parts":[{"text":"hello"}]}}`, wantKind: "", }, + { + // REGRESSION GUARD: A2A-SDK canonical Message envelope. + // Pre-fix, every successful agent reply was mis-flagged as an SDK + // error (PM scheduler hit 21 consecutive false-failure ticks before + // auto-disable; canvas screenshot 2026-05-23). + name: "clean ok response — result.kind=message (A2A Message envelope)", + body: `{"jsonrpc":"2.0","result":{"kind":"message","parts":[{"kind":"text","text":"hello"}]},"id":"1"}`, + wantKind: "", + }, + { + name: "clean ok response — result.result_kind=message", + body: `{"result":{"result_kind":"message","parts":[{"text":"hello"}]}}`, + wantKind: "", + }, + { + name: "clean ok response — result.kind=task (A2A Task envelope, async in-progress)", + body: `{"result":{"kind":"task","task_id":"abc","status":"working"}}`, + wantKind: "", + }, { name: "SDK error — result.kind=rate_limited", body: `{"result":{"kind":"rate_limited","parts":[{"text":"error"}]}}`, -- 2.52.0 From e26aef263626cc7b3402415b066f023b6a5e2fb5 Mon Sep 17 00:00:00 2001 From: hongming-ceo-delegated Date: Sat, 23 May 2026 01:12:59 -0700 Subject: [PATCH 2/2] Address CR2 review: gate result.kind=task on status.state, not blanket-ok MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CR2 (agent-reviewer) flagged the blanket "task" allowlist as too permissive: A2A Task envelopes carry a `status.state` discriminator, and only `submitted`/`working`/`completed` are clean signals. Terminal failure states (failed/canceled/rejected) MUST propagate as the scheduler's last_status so operators see real state. Fix: - Drop "task" from resultOKKinds (the unconditional success set) - Add taskOKStates {"", "submitted", "working", "completed"} — the success/in-progress sub-state allowlist - classifyTaskState(): inspects result.status.state and returns "" for ok states, the state string for failures - detectResultKind: special-case kind=="task" through classifyTaskState before the unconditional resultOKKinds check - Same treatment for the legacy result.result_kind field Regression tests (7 new): - clean: task state=working / submitted / completed / no-status-block - error: task state=failed / canceled / rejected All 25 detectResultKind test cases pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/scheduler/scheduler.go | 67 ++++++++++++++++--- .../internal/scheduler/scheduler_test.go | 40 ++++++++++- 2 files changed, 97 insertions(+), 10 deletions(-) diff --git a/workspace-server/internal/scheduler/scheduler.go b/workspace-server/internal/scheduler/scheduler.go index 2a750fcad..d77e36375 100644 --- a/workspace-server/internal/scheduler/scheduler.go +++ b/workspace-server/internal/scheduler/scheduler.go @@ -852,15 +852,56 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) { // // See #1696. // -// resultOKKinds is the allowlist of `result.kind` values that mean "this -// agent run succeeded". Anything outside this set is treated as a non-ok -// SDK signal. Add to this list when new successful envelope kinds are -// introduced upstream. +// resultOKKinds is the allowlist of `result.kind` values that are +// UNCONDITIONALLY successful (no further parsing needed). Anything +// outside this set is treated as a non-ok SDK signal, EXCEPT `task` +// which is gated separately on `result.status.state` (see +// classifyTaskState — A2A Task can be either in-progress or terminally +// failed, depending on its status). +// +// Add to this list when new always-success envelope kinds are introduced +// upstream. NEVER add an envelope that can carry a failure sub-state. var resultOKKinds = map[string]struct{}{ "": {}, // absent / empty → treat as ok (no signal) "ok": {}, // explicit success - "message": {}, // A2A-SDK Message envelope (successful agent reply) - "task": {}, // A2A-SDK Task envelope (in-progress async task) — also a success-shape signal, not an error + "message": {}, // A2A-SDK Message envelope (always a successful agent reply) +} + +// taskOKStates is the A2A Task `status.state` allowlist for results that +// have `kind: "task"`. Tasks can be in-progress (submitted/working) or +// terminally successful (completed) — those are clean signals to the +// scheduler. Terminal failure states (failed/canceled/rejected) are +// surfaced as the scheduler's last_status so operators can see the real +// state. Cf. CR2 review feedback on #1716. +var taskOKStates = map[string]struct{}{ + "": {}, // status.state absent → conservative: don't fire false-failure + "submitted": {}, // task accepted, not yet running + "working": {}, // task in progress + "completed": {}, // task finished successfully +} + +// classifyTaskState inspects `result.status.state` (or `result.status_state` +// legacy variant) and returns "" when the state is in taskOKStates (success +// or in-progress) or the state string when it is a terminal failure that +// should propagate as last_status. +func classifyTaskState(result map[string]json.RawMessage) string { + rawStatus, ok := result["status"] + if !ok { + return "" // no status block → no signal, leave clean + } + var status map[string]json.RawMessage + if err := json.Unmarshal(rawStatus, &status); err != nil { + return "" + } + if rawState, ok := status["state"]; ok { + var s string + if json.Unmarshal(rawState, &s) == nil { + if _, isOK := taskOKStates[s]; !isOK { + return s + } + } + } + return "" } func detectResultKind(body []byte) string { @@ -879,7 +920,13 @@ func detectResultKind(body []byte) string { if rawKind, ok := result["kind"]; ok { var k string if json.Unmarshal(rawKind, &k) == nil { - if _, isOK := resultOKKinds[k]; !isOK { + // Special-case task: success or failure depends on status.state. + if k == "task" { + if bad := classifyTaskState(result); bad != "" { + return bad + } + // task with ok / in-progress state → clean + } else if _, isOK := resultOKKinds[k]; !isOK { return k } } @@ -888,7 +935,11 @@ func detectResultKind(body []byte) string { if rawKind, ok := result["result_kind"]; ok { var k string if json.Unmarshal(rawKind, &k) == nil { - if _, isOK := resultOKKinds[k]; !isOK { + if k == "task" { + if bad := classifyTaskState(result); bad != "" { + return bad + } + } else if _, isOK := resultOKKinds[k]; !isOK { return k } } diff --git a/workspace-server/internal/scheduler/scheduler_test.go b/workspace-server/internal/scheduler/scheduler_test.go index 4e26fc01c..b078125ca 100644 --- a/workspace-server/internal/scheduler/scheduler_test.go +++ b/workspace-server/internal/scheduler/scheduler_test.go @@ -717,10 +717,46 @@ func TestDetectResultKind(t *testing.T) { wantKind: "", }, { - name: "clean ok response — result.kind=task (A2A Task envelope, async in-progress)", - body: `{"result":{"kind":"task","task_id":"abc","status":"working"}}`, + // A2A Task envelope, in-progress — `status.state` discriminator is + // `submitted` or `working` → treat as clean (not an SDK error). + name: "clean ok — task envelope state=working", + body: `{"result":{"kind":"task","task_id":"abc","status":{"state":"working"}}}`, wantKind: "", }, + { + name: "clean ok — task envelope state=submitted", + body: `{"result":{"kind":"task","status":{"state":"submitted"}}}`, + wantKind: "", + }, + { + name: "clean ok — task envelope state=completed", + body: `{"result":{"kind":"task","status":{"state":"completed"}}}`, + wantKind: "", + }, + { + // Conservative: missing status.state → don't fire false-failure. + name: "clean ok — task envelope no status block", + body: `{"result":{"kind":"task","task_id":"abc"}}`, + wantKind: "", + }, + { + // REGRESSION GUARD: terminal failure states MUST propagate as last_status. + // Without taskOKStates gating, the blanket "task" allowlist would have + // masked these — CR2 review feedback on #1716. + name: "SDK error — task envelope state=failed", + body: `{"result":{"kind":"task","status":{"state":"failed"}}}`, + wantKind: "failed", + }, + { + name: "SDK error — task envelope state=canceled", + body: `{"result":{"kind":"task","status":{"state":"canceled"}}}`, + wantKind: "canceled", + }, + { + name: "SDK error — task envelope state=rejected", + body: `{"result":{"kind":"task","status":{"state":"rejected"}}}`, + wantKind: "rejected", + }, { name: "SDK error — result.kind=rate_limited", body: `{"result":{"kind":"rate_limited","parts":[{"text":"error"}]}}`, -- 2.52.0