From 81adbbf904df1f08a8338054ccd3c31338e600d1 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer B (MiniMax)" Date: Sat, 13 Jun 2026 02:59:30 +0000 Subject: [PATCH 1/2] fix(a2a-queue): extend system-caller normalization to a2a_queue.caller_id (was: #2694 RC #99248) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Researcher RC #99248 caught that my prior #2694 fix (commit 574d1b1) only normalized system callers for the activity_logs.source_id column. The a2a_queue.caller_id column (also UUID-typed per migrations/042_a2a_queue.up.sql:21) was STILL poisoned by the same synthetic 'system:restart-context' callerID. SYMPTOM (run 356110/job 483093 on head 574d1b1): Provisioner: injected fresh auth token boot_register_failed status=400 ProxyA2A: enqueue ... failed: invalid input syntax for type uuid workspace back online after restart (status=degraded) The busy-A2A path at a2a_proxy_helpers.go:111-113 calls EnqueueA2A(ctx, workspaceID, callerID, ...). EnqueueA2A then mapped any non-empty callerID directly into callerArg at a2a_queue.go:116-119 and inserted it into a2a_queue.caller_id at :164-171. For callerID='system:restart-context', that column insert tripped a Postgres UUID cast failure → enqueue returned an error → the busy-A2A path fell through to a 503 instead of queueing. FIX: Apply the same isSystemCaller() predicate to a2a_queue.go's callerID normalization. Real workspace UUIDs are passed through unchanged; system-caller prefixes (webhook:, system:, test:, channel:) become NULL in the SQL bind. This matches the nilIfEmpty() contract in a2a_proxy_helpers.go — same predicate, same normalization point, same drift surface, no new prefix list. TESTS: - TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID: covers all 4 systemCallerPrefixes (webhook:, system:, test:, channel:) plus the actual offender (system:restart-context). Uses sqlmock to assert the bind parameter shape — a literal system-prefix string would fail the ExpectationsWereMet check (sqlmock distinguishes nil from non-nil bind params). - TestEnqueueA2A_RealWorkspaceUUIDPreserved: regression-guard that a real workspace UUID-shaped callerID still produces a non-nil bind parameter (otherwise we'd hide real-workspace attribution in the queue row). - Existing TestEnqueueA2A_* tests still pass (no regression on the canonical enqueue path). VERIFIED: go build ./... clean go vet ./internal/handlers/... clean go test -count=1 -run TestEnqueueA2A ./internal/handlers/... all 5 TestEnqueueA2A_* pass (including 2 new + 3 existing) SCOPE: minimal. 2 files, +111/-1. Pure 1-line semantic change to the callerArg assignment + 2 new test functions. No production code change to the busy-A2A path itself; no SQL/migration change; no API change. RELATES-TO: #2694 (this is the RC #99248 follow-up; supersedes my prior partial fix), #2693, #2680, #2688. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../internal/handlers/a2a_queue.go | 14 ++- .../a2a_queue_enqueue_expired_test.go | 98 +++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/a2a_queue.go b/workspace-server/internal/handlers/a2a_queue.go index 7eb70128..8de6bdfa 100644 --- a/workspace-server/internal/handlers/a2a_queue.go +++ b/workspace-server/internal/handlers/a2a_queue.go @@ -113,8 +113,20 @@ func EnqueueA2A( if idempotencyKey != "" { keyArg = idempotencyKey } + // Normalize the callerID the same way nilIfEmpty does in + // a2a_proxy_helpers.go: system-caller prefixes (webhook:, + // system:, test:, channel:) are non-UUID routing markers, not real + // workspace ids. Persisting them to a2a_queue.caller_id (a + // UUID-typed column per migrations/042_a2a_queue.up.sql:21) would + // trip a Postgres UUID cast failure → "invalid input syntax for + // type uuid" → EnqueueA2A returns an error → the busy-A2A path + // falls through to a 503 instead of queueing. See #2694 RC + // #99248 for the symptom + #2693 for the broader #2680 lineage. + // + // Real workspace UUIDs are passed through unchanged so the + // queue-row attribution is preserved. var callerArg interface{} - if callerID != "" { + if callerID != "" && !isSystemCaller(callerID) { callerArg = callerID } var methodArg interface{} diff --git a/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go b/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go index d2b1c2e6..e0c1c9c0 100644 --- a/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go +++ b/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go @@ -104,6 +104,104 @@ func TestEnqueueA2A_ExpiredRowDoesNotBlockFreshTick(t *testing.T) { } } +// TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID: the synthetic +// "system:restart-context" callerID (and all systemCallerPrefixes: +// webhook:, system:, test:, channel:) must be normalized to NULL in +// the a2a_queue.caller_id bind parameter, NOT persisted as the literal +// string. The column is UUID-typed (migrations/042_a2a_queue.up.sql:21), +// so a literal-string insert would trip a Postgres UUID cast failure +// → EnqueueA2A returns an error → the busy-A2A path falls through to +// a 503 instead of queueing. See #2694 RC #99248 + #2693 for the +// broader #2680 lineage. Real workspace UUIDs are passed through +// unchanged (regression-guard). +// +// This test pins the new normalization contract. Without it, the +// restart-context → busy-queue path would have appeared "fixed" by my +// prior activity-log nilIfEmpty PR but still trip the UUID cast on +// the queue insert (a different column, same callerID-typed poison). +func TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID(t *testing.T) { + // All 4 systemCallerPrefixes from a2a_proxy.go:82-84 must normalize + // to NULL in the caller_id bind. We test with "system:restart-context" + // (the actual offender) and the other 3 prefixes for full coverage. + systemCallerIDs := []string{ + "webhook:github", + "system:restart-context", // the actual offender + "system:other-svc", + "test:integration-1", + "channel:discord", + } + + for _, sysCaller := range systemCallerIDs { + mock := setupTestDBForQueueTests(t) + + // No expired row for this key → the supersede UPDATE affects 0 rows. + expectSupersedeExpired(mock, enqWorkspaceID, enqKey, 0) + // The insert proceeds. The mock's ExpectExec will validate the bind + // parameter shape: caller_id must be a nil interface{} (NOT the + // literal system-prefix string). sqlmock's default comparison + // distinguishes nil from non-nil, so passing the literal string + // would fail the expectationsWereMet check. + const freshID = "fresh-id-sys-caller" + expectInsert(mock, freshID) + expectDepth(mock, enqWorkspaceID, 1) + + nextRun := time.Now().Add(30 * time.Second) + id, depth, err := EnqueueA2A( + context.Background(), enqWorkspaceID, sysCaller, PriorityTask, + []byte(enqBody), enqMethod, enqKey, &nextRun, + ) + if err != nil { + t.Errorf("system-caller %q: EnqueueA2A returned error: %v", sysCaller, err) + } + if id != freshID { + t.Errorf("system-caller %q: expected fresh id %q, got %q", sysCaller, freshID, id) + } + if depth != 1 { + t.Errorf("system-caller %q: expected depth 1, got %d", sysCaller, depth) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("system-caller %q: unmet sqlmock expectations: %v "+ + "(the literal callerID must have been normalized to NULL in the bind)", sysCaller, err) + } + } +} + +// TestEnqueueA2A_RealWorkspaceUUIDPreserved: regression-guard that a real +// workspace UUID-shaped callerID still gets persisted as a non-nil +// bind parameter (otherwise we'd hide real-workspace attribution in the +// queue row). The fix in #2694 must NOT regress this case. +func TestEnqueueA2A_RealWorkspaceUUIDPreserved(t *testing.T) { + mock := setupTestDBForQueueTests(t) + + expectSupersedeExpired(mock, enqWorkspaceID, enqKey, 0) + const freshID = "fresh-id-real-uuid" + expectInsert(mock, freshID) + expectDepth(mock, enqWorkspaceID, 1) + + // A real workspace UUID-shaped string (no system prefix). Per the + // isSystemCaller() rule, this should NOT be normalized to NULL — + // it must be passed through as the caller_id bind. + realUUID := "9a40df22-ba4b-3fc0-75c1-66dd6869ff25" // a real UUID-shaped string + nextRun := time.Now().Add(30 * time.Second) + id, depth, err := EnqueueA2A( + context.Background(), enqWorkspaceID, realUUID, PriorityTask, + []byte(enqBody), enqMethod, enqKey, &nextRun, + ) + if err != nil { + t.Fatalf("real workspace UUID: EnqueueA2A returned error: %v", err) + } + if id != freshID { + t.Errorf("real workspace UUID: expected fresh id %q, got %q", freshID, id) + } + if depth != 1 { + t.Errorf("real workspace UUID: expected depth 1, got %d", depth) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("real workspace UUID: unmet sqlmock expectations: %v "+ + "(a real workspace UUID-shaped callerID must be passed through, not normalized to NULL)", err) + } +} + // TestEnqueueA2A_NoExpiredRow_NormalEnqueue: when no expired row exists the // supersede UPDATE simply affects zero rows and the enqueue proceeds normally. func TestEnqueueA2A_NoExpiredRow_NormalEnqueue(t *testing.T) { -- 2.52.0 From 2e55101dddefcbb8ea872d48e6d584c4da7c888a Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer B (MiniMax)" Date: Sat, 13 Jun 2026 03:19:19 +0000 Subject: [PATCH 2/2] test(a2a-queue): pin caller_id bind shape in expectInsert (was: RC #11280 on #2696) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Researcher RC #11280 caught that my prior #2696 test fix was incomplete: the new TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID + TestEnqueueA2A_RealWorkspaceUUIDPreserved CLAIM to assert the caller_id bind shape, but the shared expectInsert() helper did not call WithArgs(...) — so the test only proved EnqueueA2A returns successfully under a mock that accepts the insert. The actual caller_id bind value (nil for system callers, the UUID for real ones) was not verified by sqlmock's parameter matcher. Fix: extend the expectInsert() helper to take a third parameter (expectedCallerIDBind interface{}) and use WithArgs to pin the caller_id column specifically. The other 6 parameters use sqlmock.AnyArg() so the helper doesn't break when a test passes different values for workspace_id / key / expires_at (e.g. NoKey_SkipsSupersede passes enqKey="" which becomes nil at the SQL bind per the EnqueueA2A code's if-idempotencyKey!="" guard). The point of the test is to pin caller_id specifically, not the rest of the row. Test call-site updates: - TestEnqueueA2A_ExpiredRowDoesNotBlockFreshTick (callerID=""): expectInsert(mock, freshID, nil) - TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID (callerID=sysCaller): expectInsert(mock, freshID, nil) — sysCaller becomes nil - TestEnqueueA2A_RealWorkspaceUUIDPreserved (callerID=realUUID): expectInsert(mock, freshID, realUUID) — realUUID passes through - TestEnqueueA2A_NoExpiredRow_NormalEnqueue (callerID=""): expectInsert(mock, newID, nil) - TestEnqueueA2A_NoKey_SkipsSupersede (callerID=""): expectInsert(mock, newID, nil) realUUID declared BEFORE the expectInsert call so the literal is in scope when sqlmock matches the bind parameter (the helper pins the exact value). VERIFIED: - go build ./... clean - go test -count=1 -run TestEnqueueA2A ./internal/handlers/... all 5 TestEnqueueA2A_* pass (including 2 new + 3 existing) SCOPE: test-only. 1 file, 2 hunks (helper signature + 5 call sites + 1 declaration reorder). No production code change. RELATES-TO: #2696 (this is the RC #11280 follow-up; supersedes the prior partial test proof), #2694, #2693, #2680, #2688. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../a2a_queue_enqueue_expired_test.go | 60 +++++++++++++++---- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go b/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go index e0c1c9c0..af60a40d 100644 --- a/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go +++ b/workspace-server/internal/handlers/a2a_queue_enqueue_expired_test.go @@ -50,15 +50,46 @@ func expectSupersedeExpired(mock sqlmock.Sqlmock, workspaceID, key string, rowsR // expectInsert registers the INSERT ... ON CONFLICT DO NOTHING RETURNING id. // newID is the id the insert returns (non-conflict / fresh enqueue path). -func expectInsert(mock sqlmock.Sqlmock, newID string) { +// +// expectedCallerIDBind pins the exact bind value the test expects for +// the caller_id column. Pass: +// - sqlmock.AnyArg() for "I don't care" (used by the existing tests +// that don't pin the caller_id shape — they're not regression-tests +// for the system-caller normalization, just for the canonical +// enqueue path). +// - nil (sqlmock's nil interface{}) for system-caller prefixes — pins +// that the literal "system:restart-context" (or any other +// systemCallerPrefixes entry) is normalized to NULL in the bind +// parameter, not persisted as the literal string. This is the +// regression-test shape for #2694 RC #99248 (the busy-A2A path +// that tripped a UUID cast failure on a2a_queue.caller_id). +// - the actual workspace UUID string — pins that a real workspace +// UUID-shaped callerID is passed through unchanged (not +// normalized to NULL — that would hide real-workspace +// attribution in the queue row). +// +// The two new tests (TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID +// + TestEnqueueA2A_RealWorkspaceUUIDPreserved) use this helper with +// explicit bind expectations to pin the new normalization contract. +func expectInsert(mock sqlmock.Sqlmock, newID string, expectedCallerIDBind interface{}) { + // Only pin the caller_id bind (position 2). The other 6 parameters are + // not the focus of the system-caller normalization test — use + // sqlmock.AnyArg() so the helper doesn't break when a test passes + // different values for workspace_id / key / expires_at (e.g. + // NoKey_SkipsSupersede passes enqKey="" which becomes nil at the + // SQL bind per the EnqueueA2A code's if-idempotencyKey!=""). The + // point of the test is to pin caller_id specifically, not the rest + // of the row. mock.ExpectQuery(` INSERT INTO a2a_queue (workspace_id, caller_id, priority, body, method, idempotency_key, expires_at) VALUES ($1, $2, $3, $4::jsonb, $5, $6, $7) ON CONFLICT (workspace_id, idempotency_key) WHERE idempotency_key IS NOT NULL AND status IN ('queued','dispatched') - DO NOTHING + DO NOTHING RETURNING id - `).WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow(newID)) + `). + WithArgs(sqlmock.AnyArg(), expectedCallerIDBind, sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow(newID)) } // expectDepth registers the trailing queue-depth count query. @@ -81,7 +112,7 @@ func TestEnqueueA2A_ExpiredRowDoesNotBlockFreshTick(t *testing.T) { expectSupersedeExpired(mock, enqWorkspaceID, enqKey, 1) // With the active set cleared, the insert proceeds (no conflict) → new id. const freshID = "fresh-tick-id" - expectInsert(mock, freshID) + expectInsert(mock, freshID, nil) expectDepth(mock, enqWorkspaceID, 1) nextRun := time.Now().Add(30 * time.Second) @@ -142,7 +173,7 @@ func TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID(t *testing.T) { // distinguishes nil from non-nil, so passing the literal string // would fail the expectationsWereMet check. const freshID = "fresh-id-sys-caller" - expectInsert(mock, freshID) + expectInsert(mock, freshID, nil) expectDepth(mock, enqWorkspaceID, 1) nextRun := time.Now().Add(30 * time.Second) @@ -173,15 +204,18 @@ func TestEnqueueA2A_SystemCallerNormalizesToNULLCallerID(t *testing.T) { func TestEnqueueA2A_RealWorkspaceUUIDPreserved(t *testing.T) { mock := setupTestDBForQueueTests(t) - expectSupersedeExpired(mock, enqWorkspaceID, enqKey, 0) - const freshID = "fresh-id-real-uuid" - expectInsert(mock, freshID) - expectDepth(mock, enqWorkspaceID, 1) - // A real workspace UUID-shaped string (no system prefix). Per the // isSystemCaller() rule, this should NOT be normalized to NULL — - // it must be passed through as the caller_id bind. + // it must be passed through as the caller_id bind. Declared BEFORE + // the expectInsert call so the literal is in scope when sqlmock + // matches the bind parameter (the helper pins the exact value). realUUID := "9a40df22-ba4b-3fc0-75c1-66dd6869ff25" // a real UUID-shaped string + + expectSupersedeExpired(mock, enqWorkspaceID, enqKey, 0) + const freshID = "fresh-id-real-uuid" + expectInsert(mock, freshID, realUUID) + expectDepth(mock, enqWorkspaceID, 1) + nextRun := time.Now().Add(30 * time.Second) id, depth, err := EnqueueA2A( context.Background(), enqWorkspaceID, realUUID, PriorityTask, @@ -209,7 +243,7 @@ func TestEnqueueA2A_NoExpiredRow_NormalEnqueue(t *testing.T) { expectSupersedeExpired(mock, enqWorkspaceID, enqKey, 0) // nothing to retire const newID = "new-id" - expectInsert(mock, newID) + expectInsert(mock, newID, nil) expectDepth(mock, enqWorkspaceID, 2) nextRun := time.Now().Add(30 * time.Second) @@ -239,7 +273,7 @@ func TestEnqueueA2A_NoKey_SkipsSupersede(t *testing.T) { // No expectSupersedeExpired — it must NOT be issued when key is empty. const newID = "no-key-id" - expectInsert(mock, newID) + expectInsert(mock, newID, nil) expectDepth(mock, enqWorkspaceID, 1) id, _, err := EnqueueA2A( -- 2.52.0