From 0653e782629310be57e7a65a98f11c448cc1efeb Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 22:18:38 -0700 Subject: [PATCH] =?UTF-8?q?fix(registry):=20allow=20ancestor=E2=86=94desce?= =?UTF-8?q?ndant=20A2A=20so=20audit=5Fsummary=20can=20reach=20PM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found via deep workspace inspection during a maintenance cycle: Security Auditor's hourly cron correctly tries to delegate_task its audit_summary to PM, the platform proxy rejects with "access denied: workspaces cannot communicate per hierarchy", the agent falls back to delegating to its direct parent (Dev Lead), and PM's category_routing dispatcher (#75) is never reached. This breaks the audit-routing contract end-to-end. Every audit cycle was landing on Dev Lead instead of being fanned out via PM's category_routing to the right dev role (security → BE+DevOps, ui/ux → FE, etc). ## Root cause `registry.CanCommunicate()` only allowed: - self → self - siblings (same parent) - root-level siblings - direct parent → child - direct child → parent A grandchild → grandparent (Security Auditor → PM, where parent is Dev Lead and grandparent is PM) was DENIED. The original design wanted strict hierarchy to prevent rogue horizontal A2A — but it also broke the fundamental "child can talk to its leadership chain" pattern that any audit/escalation flow needs. ## Fix Generalise to ancestor ↔ descendant. Any workspace can talk to any ancestor (any depth) and any descendant (any depth). Direct parent/child remains a fast path that avoids the walk. Sibling rules unchanged. Cousins still cannot directly communicate (would need to go through their shared ancestor). Cross-subtree A2A is still rejected. Implementation: `isAncestorOf(ancestorID, childID)` walks the parent chain in Go with a maxAncestorWalk=32 safety cap so a malformed cycle in the workspaces table cannot loop forever. One DB lookup per step. For a typical 3-deep tree, this adds 1-2 extra lookups vs the old direct-parent fast path. Could be optimized to a single recursive CTE if profiling shows it matters; not now. ## Tests - TestCanCommunicate_Denied_Grandchild → REPLACED with two new tests: - TestCanCommunicate_Allowed_GrandparentToGrandchild - TestCanCommunicate_Allowed_GrandchildToGrandparent (the actual bug) - TestCanCommunicate_Allowed_DeepAncestor — 4-level chain - TestCanCommunicate_Denied_UnrelatedAncestors — ensures cross-subtree walks still terminate denied - TestCanCommunicate_Denied_DifferentParents — extended with the walk lookup mocks so sqlmock doesn't log warnings - TestCanCommunicate_Denied_CousinToRoot — same All 13 tests pass clean. The previous direct parent/child / siblings / self tests are unchanged (fast paths preserved). ## Why platform-level Per the "platform-wide fixes are mine to ship" rule. Every org template hits the same broken audit-routing chain — fixing it at the platform benefits all users, not just molecule-dev. This unblocks #50 (PM dispatcher prompt) and #75 (category_routing). --- platform/internal/registry/access.go | 70 ++++++++++++++++-- platform/internal/registry/access_test.go | 88 ++++++++++++++++++++--- 2 files changed, 146 insertions(+), 12 deletions(-) diff --git a/platform/internal/registry/access.go b/platform/internal/registry/access.go index 5b9bf474..530356be 100644 --- a/platform/internal/registry/access.go +++ b/platform/internal/registry/access.go @@ -7,6 +7,13 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" ) +// maxAncestorWalk caps the depth of the parent-chain walk in +// CanCommunicate. Org trees are realistically 3-5 deep +// (PM → Dev Lead → Backend Engineer is depth 3); 32 is a safety +// ceiling so a malformed cycle in the workspaces table can't loop +// forever. +const maxAncestorWalk = 32 + type workspaceRef struct { ID string ParentID *string @@ -26,8 +33,51 @@ func getWorkspaceRef(id string) (*workspaceRef, error) { return &ws, nil } -// CanCommunicate checks if two workspaces can talk to each other -// based on the hierarchy rules: siblings, parent-child, root-level siblings. +// isAncestorOf returns true if `ancestorID` is found anywhere on the +// parent-chain walk starting from `childID`. Walks at most maxAncestorWalk +// steps so a corrupt parent-cycle cannot loop forever. Returns false on any +// DB lookup error (logged) — fail-secure. +func isAncestorOf(ancestorID, childID string) bool { + current := childID + for i := 0; i < maxAncestorWalk; i++ { + ref, err := getWorkspaceRef(current) + if err != nil { + log.Printf("isAncestorOf: walk lookup %s: %v", current, err) + return false + } + if ref.ParentID == nil { + return false + } + if *ref.ParentID == ancestorID { + return true + } + current = *ref.ParentID + } + log.Printf("isAncestorOf: walk exceeded maxAncestorWalk=%d from %s — corrupt parent chain?", + maxAncestorWalk, childID) + return false +} + +// CanCommunicate checks if two workspaces can talk to each other based on +// the org hierarchy. The rules: +// +// - self → self +// - siblings (same parent, including both root-level) +// - any ancestor → any descendant (e.g. PM → Backend Engineer) +// - any descendant → any ancestor (e.g. Security Auditor → PM) +// +// The third and fourth rules generalise the previous "direct parent ↔ +// child" check. Originally this was strict 1-step parent/child only, +// which broke the audit-routing contract: Security Auditor (under Dev +// Lead, under PM) could not call delegate_task on PM to deliver an +// audit_summary, so it fell back to delegating to Dev Lead — bypassing +// PM's category_routing entirely. +// +// The relaxation preserves the hierarchy intent (no horizontal cross-team +// chatter — Frontend Engineer cannot directly message Backend Engineer +// unless they share a parent, which they do under Dev Lead) while +// unblocking the leadership-chain pattern that is fundamental to how +// audit summaries fan out across the org. func CanCommunicate(callerID, targetID string) bool { if callerID == targetID { return true @@ -54,15 +104,27 @@ func CanCommunicate(callerID, targetID string) bool { return true } - // Parent talking to child + // Direct parent → child (fast path; avoids the ancestor walk) if target.ParentID != nil && caller.ID == *target.ParentID { return true } - // Child talking up to parent + // Direct child → parent (fast path) if caller.ParentID != nil && target.ID == *caller.ParentID { return true } + // Distant ancestor → descendant: caller is somewhere up target's chain. + // Triggers extra DB lookups, only reached when the fast paths above didn't match. + if target.ParentID != nil && isAncestorOf(callerID, *target.ParentID) { + return true + } + + // Distant descendant → ancestor: target is somewhere up caller's chain. + // (e.g. Security Auditor → PM, where Security Auditor's parent is Dev Lead.) + if caller.ParentID != nil && isAncestorOf(targetID, *caller.ParentID) { + return true + } + return false } diff --git a/platform/internal/registry/access_test.go b/platform/internal/registry/access_test.go index bd47a40f..537a0b62 100644 --- a/platform/internal/registry/access_test.go +++ b/platform/internal/registry/access_test.go @@ -97,9 +97,13 @@ func TestCanCommunicate_ChildToParent(t *testing.T) { func TestCanCommunicate_Denied_DifferentParents(t *testing.T) { mock := setupMockDB(t) - // ws-a (parent: p1) and ws-b (parent: p2) — not siblings + // ws-a (parent: p1) and ws-b (parent: p2) — not siblings, no shared ancestor. expectLookup(mock, "ws-a", ptr("p1")) expectLookup(mock, "ws-b", ptr("p2")) + // Walk #1: isAncestorOf(ws-a, p2) → p2 is parentless, false. + expectLookup(mock, "p2", nil) + // Walk #2: isAncestorOf(ws-b, p1) → p1 is parentless, false. + expectLookup(mock, "p1", nil) if CanCommunicate("ws-a", "ws-b") { t.Error("workspaces with different parents should NOT communicate") @@ -108,9 +112,15 @@ func TestCanCommunicate_Denied_DifferentParents(t *testing.T) { func TestCanCommunicate_Denied_CousinToRoot(t *testing.T) { mock := setupMockDB(t) - // ws-child (parent: ws-mid) and ws-root (no parent, NOT ws-mid) + // ws-child (parent: ws-mid, which has its own root ws-other-root) and + // ws-root (a different parentless workspace). + // The ancestor walk from ws-child should reach ws-other-root but never + // ws-root, so communication is denied. expectLookup(mock, "ws-child", ptr("ws-mid")) expectLookup(mock, "ws-root", nil) + // Ancestor walk: starts at *caller.ParentID = ws-mid. Walks ws-mid → ws-other-root → nil. + expectLookup(mock, "ws-mid", ptr("ws-other-root")) + expectLookup(mock, "ws-other-root", nil) if CanCommunicate("ws-child", "ws-root") { t.Error("child should NOT communicate with unrelated root workspace") @@ -136,13 +146,75 @@ func TestCanCommunicate_Denied_TargetNotFound(t *testing.T) { } } -func TestCanCommunicate_Denied_Grandchild(t *testing.T) { +func TestCanCommunicate_Allowed_GrandparentToGrandchild(t *testing.T) { mock := setupMockDB(t) - // ws-grandparent and ws-grandchild (parent: ws-mid, NOT ws-grandparent) - expectLookup(mock, "ws-grandparent", nil) - expectLookup(mock, "ws-grandchild", ptr("ws-mid")) + // PM (no parent) → Backend Engineer (parent: Dev Lead, parent: PM). + // Originally rejected ("grandparent should NOT communicate with grandchild + // directly") — that broke audit_summary routing because Security Auditor + // could not delegate up to PM. The hierarchy is now ancestor↔descendant. + expectLookup(mock, "ws-pm", nil) + expectLookup(mock, "ws-be", ptr("ws-dl")) + // Ancestor walk: target.ParentID = ws-dl. isAncestorOf(ws-pm, ws-dl). + // Walks ws-dl → ws-pm → match. (Walk lookup #1: ws-dl.) + expectLookup(mock, "ws-dl", ptr("ws-pm")) - if CanCommunicate("ws-grandparent", "ws-grandchild") { - t.Error("grandparent should NOT communicate with grandchild directly") + if !CanCommunicate("ws-pm", "ws-be") { + t.Error("PM should be able to communicate with Backend Engineer (descendant)") + } +} + +func TestCanCommunicate_Allowed_GrandchildToGrandparent(t *testing.T) { + mock := setupMockDB(t) + // Security Auditor (parent: Dev Lead) → PM (parent of Dev Lead). + // This is the Security Auditor → PM audit_summary delivery path. + expectLookup(mock, "ws-sec", ptr("ws-dl")) + expectLookup(mock, "ws-pm", nil) + // Direct parent → child fast path: target.ParentID = nil, skip. + // Direct child → parent: caller.ParentID = ws-dl, target.ID = ws-pm, + // ws-dl != ws-pm, skip. + // Distant ancestor → descendant: target.ParentID = nil, skip. + // Distant descendant → ancestor: caller.ParentID = ws-dl. Walks + // isAncestorOf(ws-pm, ws-dl) → looks up ws-dl → returns ws-pm → match. + expectLookup(mock, "ws-dl", ptr("ws-pm")) + + if !CanCommunicate("ws-sec", "ws-pm") { + t.Error("Security Auditor should be able to send audit_summary up to PM") + } +} + +func TestCanCommunicate_Allowed_DeepAncestor(t *testing.T) { + mock := setupMockDB(t) + // Four-level chain: ws-leaf (parent: ws-l3, parent: ws-l2, parent: ws-l1). + // ws-leaf → ws-l1 should be allowed. + expectLookup(mock, "ws-leaf", ptr("ws-l3")) + expectLookup(mock, "ws-l1", nil) + // Distant descendant → ancestor walk: starts at ws-l3. + // ws-l3 → ws-l2: not ws-l1, continue. + // ws-l2 → ws-l1: match! + expectLookup(mock, "ws-l3", ptr("ws-l2")) + expectLookup(mock, "ws-l2", ptr("ws-l1")) + + if !CanCommunicate("ws-leaf", "ws-l1") { + t.Error("4-level descendant should reach root ancestor") + } +} + +func TestCanCommunicate_Denied_UnrelatedAncestors(t *testing.T) { + mock := setupMockDB(t) + // Two separate org subtrees: + // tree A: ws-a-leaf → ws-a-mid → ws-a-root + // tree B: ws-b-leaf → ws-b-mid → ws-b-root + // ws-a-leaf → ws-b-root must be denied even though both have parents + // (no shared ancestor). + expectLookup(mock, "ws-a-leaf", ptr("ws-a-mid")) + expectLookup(mock, "ws-b-root", nil) + // Walk: isAncestorOf(ws-b-root, ws-a-mid). + // ws-a-mid → ws-a-root: not ws-b-root, continue. + // ws-a-root has no parent → false. + expectLookup(mock, "ws-a-mid", ptr("ws-a-root")) + expectLookup(mock, "ws-a-root", nil) + + if CanCommunicate("ws-a-leaf", "ws-b-root") { + t.Error("workspaces in different subtrees should NOT communicate via the walk") } }