fix(self-deleg): 3-layer defense including SQL self-filter (closes #383)
CI / Canvas Deploy Reminder (push) Blocked by required conditions
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Waiting to run
publish-workspace-server-image / build-and-push (push) Failing after 12s
publish-workspace-server-image / Production auto-deploy (push) Has been skipped
Block internal-flavored paths / Block forbidden paths (push) Successful in 4s
CI / Detect changes (push) Successful in 7s
CI / Shellcheck (E2E scripts) (push) Failing after 17s
CI / all-required (push) Failing after 2s
E2E API Smoke Test / detect-changes (push) Successful in 6s
E2E Chat / detect-changes (push) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 7s
Handlers Postgres Integration / detect-changes (push) Successful in 6s
Harness Replays / detect-changes (push) Successful in 6s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 8s
CI / Platform (Go) (push) Successful in 4m55s
CI / Canvas (Next.js) (push) Successful in 6m13s
CI / Python Lint & Test (push) Successful in 6m43s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 3s
Harness Replays / Harness Replays (push) Successful in 17s
E2E API Smoke Test / E2E API Smoke Test (push) Failing after 2m20s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m31s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 2m43s
E2E Chat / E2E Chat (push) Failing after 7m5s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
main-red-watchdog / watchdog (push) Successful in 31s
gate-check-v3 / gate-check (push) Successful in 22s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m34s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 4s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 13s
gitea-merge-queue / queue (push) Successful in 13s
status-reaper / reap (push) Successful in 1m11s
ci-required-drift / drift (push) Successful in 1m15s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 5m46s

2 APPROVES (core-security id=5165, core-devops id=5167). CI/all-required green. Resolves chloe-dong 小董文婷 self-delegation 400 loop.
Co-authored-by: core-be <core-be@agents.moleculesai.app>
Co-committed-by: core-be <core-be@agents.moleculesai.app>
This commit was merged in pull request #1624.
This commit is contained in:
2026-05-21 00:12:38 +00:00
parent ee9dc5b9c5
commit 2ee97c097d
4 changed files with 265 additions and 22 deletions
@@ -122,8 +122,22 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
// #548 — prevent self-delegation: a workspace delegating to itself
// acquires _run_lock twice on the same mutex, deadlocking permanently.
//
// #383 — the error message is the agent-visible string when this 400
// fires on the SDK's _delegate_sync_via_polling path. The previous
// terse "self-delegation not permitted" was correct but indistinct
// from a transient rate-limit or auth failure, so the LLM would
// re-attempt every 2-3s in a tight loop (chloe-dong tenant external
// workspace, 2026-05-20). The expanded message is explicit about
// (a) what just happened, (b) why it cannot succeed, (c) what to do
// instead — so the agent's retry heuristic recognizes the path as
// terminal and stops.
if sourceID == body.TargetID {
c.JSON(http.StatusBadRequest, gin.H{"error": "self-delegation not permitted"})
c.JSON(http.StatusBadRequest, gin.H{
"error": "self-delegation not permitted",
"reason": "the source workspace and target workspace are the same; you cannot delegate a task to yourself",
"hint": "do the work yourself, or pick a different peer via list_peers — retrying with the same target_id will fail every time",
})
return
}
@@ -256,24 +256,43 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
peers = append(peers, siblings...)
}
// Children
// Children — exclude self defensively. A child row whose parent_id
// equals the requesting workspaceID can never legitimately be the
// caller (a workspace can't be its own child), but a future data-
// integrity defect (e.g. self-loop introduced by a buggy register
// path) would otherwise smuggle the caller back into its own peer
// list. The agent then attempts `delegate_task(<own_id>)`, which
// either deadlocks _run_lock (sync path) or hits the platform's
// self-delegation 400 in a tight loop (#383). The `w.id != $2`
// clause makes self-delegation-via-peer-list impossible regardless
// of DB state.
children, _ := queryPeerMaps(`
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
w.parent_id, w.active_tasks
FROM workspaces w WHERE w.parent_id = $1 AND w.status != 'removed'`, workspaceID)
FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
workspaceID, workspaceID)
peers = append(peers, children...)
// Parent
// Parent — same defense-in-depth. A workspace whose parent_id points
// to itself is data corruption, but the peer-list endpoint must not
// propagate that corruption back to the agent as a "peer who is also
// you" entry.
if parentID.Valid {
parent, _ := queryPeerMaps(`
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
w.parent_id, w.active_tasks
FROM workspaces w WHERE w.id = $1 AND w.status != 'removed'`, parentID.String)
FROM workspaces w WHERE w.id = $1 AND w.id != $2 AND w.status != 'removed'`,
parentID.String, workspaceID)
peers = append(peers, parent...)
}
// #383 final-line defense: even if a future code path adds a query
// that doesn't filter self, strip the caller's own row before
// returning. Cheap O(n) over a peer set bounded at <50 rows.
peers = excludeSelfFromPeers(peers, workspaceID)
peers = filterPeersByQuery(peers, c.Query("q"))
if peers == nil {
@@ -282,6 +301,32 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
c.JSON(http.StatusOK, peers)
}
// excludeSelfFromPeers strips any peer entry whose ``id`` equals
// ``workspaceID`` (the caller's own row). Final-line defense for #383
// (self-delegation 400-loop on external workspaces): a peer-list that
// includes the requester's own row is the root mechanism by which an
// agent ends up delegating to itself. The pre-DB filters in Peers
// already enforce `w.id != $caller` on each branch; this function
// guarantees the contract holds regardless of which query path
// returned the row, including future ones added without a self-filter.
//
// O(n) over a peer set bounded at <50 rows per `Peers` comment — well
// below the hot-path overhead of the existing filterPeersByQuery.
func excludeSelfFromPeers(peers []map[string]interface{}, workspaceID string) []map[string]interface{} {
if len(peers) == 0 {
return peers
}
out := make([]map[string]interface{}, 0, len(peers))
for _, p := range peers {
id, _ := p["id"].(string)
if id == workspaceID {
continue
}
out = append(out, p)
}
return out
}
// filterPeersByQuery returns peers whose name or role case-insensitively
// contains q. Whitespace-trimmed empty q is a no-op (returns input unchanged).
func filterPeersByQuery(peers []map[string]interface{}, q string) []map[string]interface{} {
@@ -125,14 +125,14 @@ func TestPeers_WithParent(t *testing.T) {
WillReturnRows(sqlmock.NewRows(peerCols).
AddRow("ws-sibling-2", "Sibling Two", "worker", 1, "online", []byte("null"), "http://localhost:8002", "ws-parent", 0))
// Expect children query
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.status").
WithArgs("ws-sibling-1").
// Expect children query — #383 added explicit `w.id != $2` self-filter
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-sibling-1", "ws-sibling-1").
WillReturnRows(sqlmock.NewRows(peerCols))
// Expect parent query
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.status").
WithArgs("ws-parent").
// Expect parent query — #383 added explicit `w.id != $2` self-filter
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-parent", "ws-sibling-1").
WillReturnRows(sqlmock.NewRows(peerCols).
AddRow("ws-parent", "Parent PM", "manager", 2, "online", []byte("null"), "http://localhost:8001", nil, 1))
@@ -228,9 +228,9 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
WithArgs("ws-root-alone").
WillReturnRows(sqlmock.NewRows(peerCols))
// Children — none
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1").
WithArgs("ws-root-alone").
// Children — none. #383 added explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
WithArgs("ws-root-alone", "ws-root-alone").
WillReturnRows(sqlmock.NewRows(peerCols))
// No parent query since parent_id is NULL
@@ -282,12 +282,14 @@ func peersFilterFixture(t *testing.T) (*DiscoveryHandler, sqlmock.Sqlmock) {
AddRow("ws-alpha", "Alpha Researcher", "researcher", 1, "online", []byte("null"), "http://a", "ws-pm", 0).
AddRow("ws-beta", "Beta Designer", "designer", 1, "online", []byte("null"), "http://b", "ws-pm", 0))
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.status").
WithArgs("ws-self").
// #383 — children query gained explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-self", "ws-self").
WillReturnRows(sqlmock.NewRows(cols))
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.status").
WithArgs("ws-pm").
// #383 — parent query gained explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-pm", "ws-self").
WillReturnRows(sqlmock.NewRows(cols).
AddRow("ws-pm", "PM Workspace", "manager", 2, "online", []byte("null"), "http://pm", nil, 1))
@@ -966,8 +968,9 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id IS NULL AND w.id").
WithArgs("ws-dev").
WillReturnRows(sqlmock.NewRows(peerCols))
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id = \\$1 AND w.status").
WithArgs("ws-dev").
// #383 — children query gained explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-dev", "ws-dev").
WillReturnRows(sqlmock.NewRows(peerCols))
w := httptest.NewRecorder()
@@ -1030,3 +1033,183 @@ func TestPeers_DevModeFailOpen_ClosedInProduction(t *testing.T) {
t.Fatalf("expected 401 in production, got %d: %s", w.Code, w.Body.String())
}
}
// ==================== Peers — #383 self never appears in result ====================
// TestPeers_ExcludeSelf_DefenseInDepth verifies the final-line filter in
// Peers strips any row whose id matches the caller. The pre-DB SQL filters
// already do this, but a future code path that omits the `w.id != $caller`
// clause must not be able to smuggle a self-row through. This test
// simulates that future-defect case by mocking the children query to
// (incorrectly) return a row whose id matches the caller, and asserts the
// final filter still drops it.
//
// Root cause class for #383: an agent that sees its own row in /peers
// proceeds to delegate_task to itself, hitting the platform's
// self-delegation 400 in a tight loop. The fix in discovery.go is
// defense-in-depth: even if the SQL filter regresses, this handler-level
// filter prevents the 400-loop from materializing.
func TestPeers_ExcludeSelf_DefenseInDepth(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewDiscoveryHandler()
const selfID = "ws-xiaodong"
// parent_id lookup — workspace has a parent.
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(selfID).
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
// Siblings — returns one legitimate sibling. The SQL filter excludes
// self at the source.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
WithArgs("ws-parent", selfID).
WillReturnRows(sqlmock.NewRows(peerCols).
AddRow("ws-sibling", "Sibling", "worker", 1, "online", []byte("null"), "http://localhost:8002", "ws-parent", 0))
// Children — simulates the data-integrity defect class: the DB
// (incorrectly) returns the caller's own row in the children set.
// In real production this would require a workspace whose
// parent_id points to itself — corruption only, but the handler
// must not propagate it.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs(selfID, selfID).
WillReturnRows(sqlmock.NewRows(peerCols).
AddRow(selfID, "Self As Child", "worker", 1, "online", []byte("null"), "http://localhost:8001", selfID, 0).
AddRow("ws-child", "Real Child", "worker", 1, "online", []byte("null"), "http://localhost:8003", selfID, 0))
// Parent — explicit `w.id != $2` clause so the parent path is also
// self-filtered. parentID.String = "ws-parent" != selfID, so the
// row is included.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-parent", selfID).
WillReturnRows(sqlmock.NewRows(peerCols).
AddRow("ws-parent", "Parent", "manager", 2, "online", []byte("null"), "http://localhost:8004", nil, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: selfID}}
c.Request = httptest.NewRequest("GET", "/registry/"+selfID+"/peers", nil)
handler.Peers(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var peers []map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
// The defense-in-depth filter must drop the self row even though
// the (mocked-defective) children query returned it.
for _, p := range peers {
if id, _ := p["id"].(string); id == selfID {
t.Fatalf("peer list contains caller's own id %q — self-delegation defense regressed; full list: %+v", selfID, peers)
}
}
// Sanity: the three legitimate peers (sibling, real child, parent)
// must all be present. Catches an over-aggressive filter that
// strips legitimate rows.
expectedIDs := map[string]bool{"ws-sibling": false, "ws-child": false, "ws-parent": false}
for _, p := range peers {
if id, _ := p["id"].(string); id != "" {
if _, ok := expectedIDs[id]; ok {
expectedIDs[id] = true
}
}
}
for id, found := range expectedIDs {
if !found {
t.Errorf("legitimate peer %q missing from response; got %+v", id, peers)
}
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestExcludeSelfFromPeers_Unit exercises the helper directly so the
// defense-in-depth contract is asserted independently of SQL mocking.
// Pure-function tests run in microseconds and pin the filter shape
// (empty input, no-match passthrough, single-row drop, multi-row drop,
// preserves order) so future edits to the helper can't silently
// regress to "returns input unchanged".
func TestExcludeSelfFromPeers_Unit(t *testing.T) {
t.Run("empty input returns empty slice", func(t *testing.T) {
out := excludeSelfFromPeers(nil, "ws-self")
if len(out) != 0 {
t.Errorf("expected empty, got %+v", out)
}
})
t.Run("no self in list passes through unchanged", func(t *testing.T) {
in := []map[string]interface{}{
{"id": "ws-a", "name": "A"},
{"id": "ws-b", "name": "B"},
}
out := excludeSelfFromPeers(in, "ws-self")
if len(out) != 2 {
t.Fatalf("expected 2, got %d (%+v)", len(out), out)
}
if out[0]["id"] != "ws-a" || out[1]["id"] != "ws-b" {
t.Errorf("order not preserved: %+v", out)
}
})
t.Run("self row dropped, others preserved", func(t *testing.T) {
in := []map[string]interface{}{
{"id": "ws-a", "name": "A"},
{"id": "ws-self", "name": "Me"},
{"id": "ws-b", "name": "B"},
}
out := excludeSelfFromPeers(in, "ws-self")
if len(out) != 2 {
t.Fatalf("expected 2, got %d (%+v)", len(out), out)
}
if out[0]["id"] != "ws-a" || out[1]["id"] != "ws-b" {
t.Errorf("expected [ws-a, ws-b], got %+v", out)
}
})
t.Run("multiple self rows all dropped", func(t *testing.T) {
// Pathological — should never happen, but the contract is
// "no row with id==workspaceID survives", not "at most one
// such row is dropped". Pin it.
in := []map[string]interface{}{
{"id": "ws-self", "name": "Me1"},
{"id": "ws-a", "name": "A"},
{"id": "ws-self", "name": "Me2"},
}
out := excludeSelfFromPeers(in, "ws-self")
if len(out) != 1 {
t.Fatalf("expected 1, got %d (%+v)", len(out), out)
}
if out[0]["id"] != "ws-a" {
t.Errorf("expected [ws-a], got %+v", out)
}
})
t.Run("row with missing id key is preserved (not a self-collision)", func(t *testing.T) {
// A peer row with no "id" key shouldn't be silently dropped
// by the self-filter — it's a malformed row class that
// belongs to a different defect.
in := []map[string]interface{}{
{"name": "no-id-row"},
{"id": "ws-self", "name": "Me"},
}
out := excludeSelfFromPeers(in, "ws-self")
if len(out) != 1 {
t.Fatalf("expected 1, got %d (%+v)", len(out), out)
}
if out[0]["name"] != "no-id-row" {
t.Errorf("expected no-id-row preserved, got %+v", out)
}
})
}
@@ -432,9 +432,10 @@ func TestExtended_Peers(t *testing.T) {
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
AddRow("ws-sibling", "Sibling Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", nil, 0))
// Expect children query (workspaces with parent_id = ws-peer)
// Expect children query (workspaces with parent_id = ws-peer, excluding self)
// Query now binds (parent_id, self_id) for the self-filter guard added in #383.
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-peer").
WithArgs("ws-peer", "ws-peer").
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}))
// No parent query since workspace is root-level