fix(registry,discovery,mcp): prevent cross-tenant peer metadata leak (#1953) #1956

Closed
agent-pm wants to merge 1 commits from fix/cross-tenant-isolation-1953 into main
8 changed files with 38 additions and 54 deletions
@@ -1059,13 +1059,13 @@ func expectExecuteDelegationBase(mock sqlmock.Sqlmock) {
WillReturnResult(sqlmock.NewResult(0, 1))
// CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target).
// Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed.
// Source is root-level, target is its child → parent→child communication allowed.
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(testDeliverySourceID).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, nil))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(testDeliveryTargetID).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, testDeliverySourceID))
// resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url),
// so resolveAgentURL gets a cache hit and never falls back to DB.
@@ -246,15 +246,11 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
parentID.String, workspaceID)
peers = append(peers, siblings...)
} else {
siblings, _ := queryPeerMaps(`
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
w.parent_id, w.active_tasks
FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
workspaceID)
peers = append(peers, siblings...)
}
// Root workspaces (parent_id IS NULL) have no siblings: each org has
// exactly one root, so a distinct workspace with parent_id IS NULL
// belongs to a different tenant. Querying `parent_id IS NULL` would
// leak every org root across tenants (security #1953).
// Children — exclude self defensively. A child row whose parent_id
// equals the requesting workspaceID can never legitimately be the
@@ -223,10 +223,8 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
// Siblings (other root-level workspaces) — none
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
WithArgs("ws-root-alone").
WillReturnRows(sqlmock.NewRows(peerCols))
// Siblings — root workspaces have none (each org has exactly one root;
// querying `parent_id IS NULL` would leak all org roots cross-tenant).
// Children — none. #383 added explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
@@ -959,15 +957,12 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
peersAuthFixtureHasLiveToken(mock, "ws-dev")
// Root workspace → children+parent queries still fire but the
// parent_id lookup comes first.
// Root workspace → no sibling query (security #1953), children+parent
// queries still fire, and the parent_id lookup comes first.
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-dev").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id IS NULL AND w.id").
WithArgs("ws-dev").
WillReturnRows(sqlmock.NewRows(peerCols))
// #383 — children query gained explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-dev", "ws-dev").
@@ -576,13 +576,13 @@ func TestDiscover_TargetOffline(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// Both root-level, access allowed
// Same-parent siblings, access allowed
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-caller").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", "ws-parent"))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-off").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-off", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-off", "ws-parent"))
// Name + runtime lookup (discovery now queries both)
mock.ExpectQuery("SELECT COALESCE").
@@ -622,13 +622,13 @@ func TestCheckAccess_SiblingsAllowed(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// Both root-level siblings → allowed
// Siblings under a shared parent → allowed
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-a").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", "ws-parent"))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-b").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", "ws-parent"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -376,14 +376,13 @@ func TestExtended_DiscoverWithCallerID(t *testing.T) {
handler := NewDiscoveryHandler()
// CanCommunicate needs to look up both workspaces
// Caller: root-level (no parent)
// Caller and target are siblings under a shared parent
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-caller").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", nil))
// Target: also root-level (no parent) — root-level siblings are allowed
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", "ws-parent"))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-target").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))
// Discover handler looks up workspace name + runtime
mock.ExpectQuery("SELECT COALESCE").
@@ -458,14 +457,14 @@ func TestExtended_Peers(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// Expect parent_id lookup for requesting workspace (root-level, no parent)
// Expect parent_id lookup for requesting workspace (has a parent)
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-peer").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-parent"))
// Expect root-level siblings query (parent IS NULL, excluding self)
// Expect siblings query (same parent, excluding self)
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-peer").
WithArgs("ws-parent", "ws-peer").
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
AddRow("ws-sibling", "Sibling Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", nil, 0))
@@ -512,13 +511,13 @@ func TestExtended_CheckAccess(t *testing.T) {
handler := NewDiscoveryHandler()
// CanCommunicate will look up both workspaces
// Both root-level — should be allowed
// Siblings under a shared parent — should be allowed
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-a").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", "ws-parent"))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-b").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", "ws-parent"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -107,16 +107,10 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str
log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr)
}
}
} else {
rows, err := h.database.QueryContext(ctx,
cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
workspaceID)
if err == nil {
if scanErr := scanPeers(rows); scanErr != nil {
log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr)
}
}
}
// Root workspaces (parent_id IS NULL) have no siblings: each org has
// exactly one root. Querying `parent_id IS NULL` would leak every org
// root across tenants (security #1953).
// Children
{
+3 -5
View File
@@ -94,15 +94,13 @@ func CanCommunicate(callerID, targetID string) bool {
return false
}
// Siblings — same parent (including root-level where both have no parent)
// Siblings — same parent (excluding root-level; each org has exactly one
// root workspace, so two distinct root-level workspaces are in different
// orgs and must not communicate cross-tenant).
if caller.ParentID != nil && target.ParentID != nil &&
*caller.ParentID == *target.ParentID {
return true
}
// Root-level siblings — both have no parent
if caller.ParentID == nil && target.ParentID == nil {
return true
}
// Direct parent → child (fast path; avoids the ancestor walk)
if target.ParentID != nil && caller.ID == *target.ParentID {
@@ -63,14 +63,16 @@ func TestCanCommunicate_Siblings(t *testing.T) {
}
}
func TestCanCommunicate_RootSiblings(t *testing.T) {
func TestCanCommunicate_Denied_RootCrossTenant(t *testing.T) {
mock := setupMockDB(t)
// Both at root level (no parent)
// Both at root level (no parent) but in different orgs.
// parent_id = NULL means org root; each org has exactly one root,
// so two distinct root workspaces must NOT communicate.
expectLookup(mock, "ws-a", nil)
expectLookup(mock, "ws-b", nil)
if !CanCommunicate("ws-a", "ws-b") {
t.Error("root-level siblings should communicate")
if CanCommunicate("ws-a", "ws-b") {
t.Error("distinct root-level workspaces should NOT communicate cross-tenant")
}
}