diff --git a/tests/e2e/test_api.sh b/tests/e2e/test_api.sh index 770e627c8..26d443e8e 100644 --- a/tests/e2e/test_api.sh +++ b/tests/e2e/test_api.sh @@ -73,7 +73,15 @@ else fi # Test 4: Create workspace B (needs bearer — tokens now exist in DB) -R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Summarizer Agent","tier":1,"runtime":"external","external":true}') +# #1953 cross-tenant isolation: Summarizer is created as a CHILD of Echo so the +# two live in the SAME org (Echo is the org root; Summarizer hangs off it via +# parent_id). The peer-discovery tests below assert same-org peer enumeration +# (Echo sees its child, the child sees its parent). Previously both were created +# parent_id=NULL — two DISTINCT org roots — and "peers" only listed each other +# via the `WHERE parent_id IS NULL` branch that returned every tenant's org root. +# That branch WAS the cross-tenant leak (#1953) and is now removed, so two org +# roots no longer see each other; the assertions must run inside one org. +R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d "{\"name\":\"Summarizer Agent\",\"tier\":1,\"runtime\":\"external\",\"external\":true,\"parent_id\":\"$ECHO_ID\"}") check "POST /workspaces (create summarizer)" '"status":"awaiting_agent"' "$R" SUM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") @@ -133,21 +141,23 @@ check "Heartbeat updated uptime" '"uptime_seconds":120' "$R" R=$(curl -s "$BASE/registry/discover/$ECHO_ID") check "GET /registry/discover/:id (missing caller rejected)" 'X-Workspace-ID header is required' "$R" -# Test 12: Discover (from sibling — allowed) +# Test 12: Discover (from same-org child — allowed) R=$(curl -s "$BASE/registry/discover/$ECHO_ID" -H "X-Workspace-ID: $SUM_ID" -H "Authorization: Bearer $SUM_TOKEN") -check "GET /registry/discover/:id (sibling)" '"url"' "$R" +check "GET /registry/discover/:id (same-org)" '"url"' "$R" -# Test 13: Peers (root siblings see each other) +# Test 13: Peers — same-org parent/child see each other (#1953). Echo is the org +# root and lists its child Summarizer; Summarizer lists its parent Echo. A +# cross-org workspace would NOT appear here (see cross_tenant_isolation_test.go). R=$(curl -s "$BASE/registry/$ECHO_ID/peers" -H "Authorization: Bearer $ECHO_TOKEN") check "GET /registry/:id/peers (has summarizer)" '"Summarizer' "$R" R=$(curl -s "$BASE/registry/$SUM_ID/peers" -H "Authorization: Bearer $SUM_TOKEN") check "GET /registry/:id/peers (has echo)" '"Echo Agent"' "$R" -# Test 14: Check access (root siblings) +# Test 14: Check access (same-org parent↔child — allowed) R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/json" \ -d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}") -check "POST /registry/check-access (siblings allowed)" '"allowed":true' "$R" +check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R" # Test 15: PATCH workspace (update position) R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}') @@ -289,32 +299,40 @@ R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN") check "current_task in list response" '"current_task"' "$R" # Test 21: Delete -R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \ - -H "Authorization: Bearer $ECHO_TOKEN" \ - -H "X-Confirm-Name: Echo Agent v2") -check "DELETE /workspaces/:id" '"status":"removed"' "$R" - -R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $SUM_TOKEN") -COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))") -check "List after delete (count=1)" "1" "$COUNT" - -# Test 22: Bundle round-trip — export → delete → import → verify same config -echo "" -echo "--- Bundle Round-Trip Test ---" - -# Export the summarizer workspace (#165 / PR #167 — admin-gated) +# #1953: Summarizer is now a CHILD of Echo (same-org, for the peer-discovery +# tests above). DELETE on the *parent* (Echo) cascade-removes its descendants +# (CascadeDelete walks the recursive `parent_id` CTE), so deleting Echo first +# would also remove Summarizer and the "one survives" assertion would see 0. +# Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade +# upward, so the parent Echo survives and count=1 holds. The bundle round-trip +# below needs Summarizer's exported config, so capture it BEFORE this delete. BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN") check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE" - -# Capture original config for comparison ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])") ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])") -# Delete the workspace — use SUM_TOKEN (per-workspace) for WorkspaceAuth -# and ADMIN_TOKEN for the AdminAuth layer. -R=$(curl -s -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \ +R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \ -H "Authorization: Bearer $SUM_TOKEN" \ -H "X-Confirm-Name: Summarizer Agent") +check "DELETE /workspaces/:id" '"status":"removed"' "$R" + +# Parent Echo must survive a child delete — list as Echo and expect count=1. +R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN") +COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))") +check "List after delete (count=1)" "1" "$COUNT" + +# Test 22: Bundle round-trip — export → delete → import → verify same config. +# Summarizer's bundle was captured above; now delete the parent Echo (the only +# remaining workspace) so the import lands in a clean org, then re-import the +# Summarizer bundle. +echo "" +echo "--- Bundle Round-Trip Test ---" + +# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for +# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer. +R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \ + -H "Authorization: Bearer $ECHO_TOKEN" \ + -H "X-Confirm-Name: Echo Agent v2") check "Delete before re-import" '"status":"removed"' "$R" # After deleting both workspaces, all per-workspace tokens are revoked. diff --git a/workspace-server/internal/handlers/a2a_proxy.go b/workspace-server/internal/handlers/a2a_proxy.go index 46aa78f66..04f52af0a 100644 --- a/workspace-server/internal/handlers/a2a_proxy.go +++ b/workspace-server/internal/handlers/a2a_proxy.go @@ -375,6 +375,30 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri Response: gin.H{"error": "access denied: workspaces cannot communicate per hierarchy rules"}, } } + + // #1953 cross-tenant isolation. CanCommunicate alone does NOT enforce + // org boundaries: its "root-level siblings — both have no parent" rule + // treats every tenant's org root as a sibling, so a caller that is an + // org root could resolve and route a2a to another tenant's org root + // (and resolveAgentURL accepts ANY workspace id with no org check). + // Gate on the SAME parent_id-chain org scoping the OFFSEC-015 broadcast + // fix uses: reject before resolveAgentURL when caller and target are in + // different orgs. Fail-closed — a DB error denies cross-org routing. + ok, err := sameOrg(ctx, db.DB, callerID, workspaceID) + if err != nil { + log.Printf("ProxyA2A: org-scope check failed %s → %s: %v — denying", callerID, workspaceID, err) + return 0, nil, &proxyA2AError{ + Status: http.StatusForbidden, + Response: gin.H{"error": "access denied: org isolation check failed"}, + } + } + if !ok { + log.Printf("ProxyA2A: cross-org routing denied %s → %s (#1953)", callerID, workspaceID) + return 0, nil, &proxyA2AError{ + Status: http.StatusForbidden, + Response: gin.H{"error": "access denied: target workspace is in a different org"}, + } + } } // Budget enforcement: reject A2A calls when the workspace has exceeded its diff --git a/workspace-server/internal/handlers/a2a_proxy_test.go b/workspace-server/internal/handlers/a2a_proxy_test.go index 2e997d168..6a254a7f5 100644 --- a/workspace-server/internal/handlers/a2a_proxy_test.go +++ b/workspace-server/internal/handlers/a2a_proxy_test.go @@ -437,6 +437,10 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) { WithArgs("ws-target"). WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent")) + // #1953 cross-tenant guard: same-org check after CanCommunicate. Both + // workspaces resolve to the same org root → routing allowed. + mockSameOrg(mock, "ws-caller", "ws-target", true) + expectBudgetCheck(mock, "ws-target") // Expect activity log with source_id set @@ -465,6 +469,24 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) { } } +// mockSameOrg sets up the two org-root recursive-CTE expectations that the +// #1953 cross-tenant guard in proxyA2ARequest runs after CanCommunicate passes. +// sameOrg=true returns the SAME root_id for both caller and target (same tenant); +// sameOrg=false returns different root_ids (cross-tenant → routing must be denied). +func mockSameOrg(mock sqlmock.Sqlmock, caller, target string, sameOrg bool) { + callerRoot := "org-root-shared" + targetRoot := "org-root-shared" + if !sameOrg { + targetRoot = "org-root-other-tenant" + } + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(callerRoot)) + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(target). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(targetRoot)) +} + // mockCanCommunicate sets up sqlmock expectations for CanCommunicate(caller, target). // allowed=true sets up rows that satisfy the access policy (siblings under same parent). // allowed=false sets up rows that don't (different parents). @@ -659,6 +681,9 @@ func TestProxyA2A_CallerIDDerivedFromBearer(t *testing.T) { WithArgs("ws-target"). WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent")) + // 3b. #1953 cross-tenant guard — same org root → routing allowed. + mockSameOrg(mock, "ws-caller", "ws-target", true) + expectBudgetCheck(mock, "ws-target") // 4. activity_logs INSERT — verify source_id arg is the derived ws-caller diff --git a/workspace-server/internal/handlers/cross_tenant_isolation_test.go b/workspace-server/internal/handlers/cross_tenant_isolation_test.go new file mode 100644 index 000000000..10cd91ba9 --- /dev/null +++ b/workspace-server/internal/handlers/cross_tenant_isolation_test.go @@ -0,0 +1,427 @@ +package handlers + +// cross_tenant_isolation_test.go — #1953 regression tests. +// +// Three workspace-server paths historically derived an "org-root sibling set" +// as `WHERE parent_id IS NULL`, which matches EVERY tenant's org root (the +// workspaces table has no org_id column) → cross-tenant data exposure: +// +// 1. GET /registry/:id/peers (discovery.Peers) +// 2. MCP toolListPeers (mcp_tools.toolListPeers) +// 3. a2a routing (a2a_proxy.proxyA2ARequest → resolveAgentURL) +// +// These tests assert that a workspace in a DIFFERENT org is never returned as a +// peer and that a2a refuses to resolve/route to a workspace outside the caller's +// org, while same-org peers/targets still work. They reuse the SAME parent_id- +// chain org scoping the OFFSEC-015 broadcast fix introduced (org_scope.go). + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db" + "github.com/DATA-DOG/go-sqlmock" + "github.com/gin-gonic/gin" +) + +// dbHandleForTest returns the global sqlmock-backed *sql.DB that setupTestDB +// installs, for tests that need to hand a *sql.DB to a component (e.g. +// MCPHandler.database, sameOrg) rather than relying on the package-global. +func dbHandleForTest() *sql.DB { return db.DB } + +// peerColsForIsolation matches queryPeerMaps' SELECT column set. +var peerColsForIsolation = []string{ + "id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks", +} + +// ------------------------------------------------------------------------- +// Path 1: GET /registry/:id/peers — discovery.Peers +// ------------------------------------------------------------------------- + +// TestPeers_CrossTenant_OrgRootNotLeaked is the core #1953 regression for the +// discovery path. The caller is an org root (parent_id IS NULL). Pre-fix the +// handler ran `SELECT ... WHERE w.parent_id IS NULL AND w.id != $1`, returning +// every OTHER tenant's org root as a "sibling" peer. Post-fix an org-root caller +// issues NO sibling query — its only peers are its own children. If the handler +// regressed and issued the cross-tenant sibling query, sqlmock would report an +// unexpected query (the expectation below is intentionally NOT registered) and +// the test fails. +func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + handler := NewDiscoveryHandler() + + // Behavioural leak test: register the OLD leaky `parent_id IS NULL` sibling + // query so that IF the handler still issues it, it returns another tenant's + // org root (org-b-root). The fix removes that query for an org-root caller, + // so org-b-root must never appear in the output. Unordered matching makes + // the leaky-sibling expectation optional — the fix simply never consumes it. + mock.MatchExpectationsInOrder(false) + + caller := "org-a-root" // parent_id IS NULL — an org root for tenant A + + // parent_id lookup → NULL (caller is an org root) + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil)) + + // LEAKY sibling query (pre-fix). Returns a DIFFERENT tenant's org root. + // The fix must NOT issue this query; if it does, org-b-root leaks into the + // peer list and the output assertion below fails. + mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows(peerColsForIsolation). + AddRow("org-b-root", "Org B Root", "lead", 0, "online", []byte("null"), "http://b-root", nil, 0)) + + // Children query — caller's own org-A children only. Return one child. + mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2"). + WithArgs(caller, caller). + WillReturnRows(sqlmock.NewRows(peerColsForIsolation). + AddRow("org-a-child", "Org A Child", "worker", 1, "online", []byte("null"), "http://a-child", caller, 0)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: caller}} + c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil) + + handler.Peers(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + + var peers []map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil { + t.Fatalf("failed to parse response: %v", err) + } + + // The other-tenant org root must NEVER appear; only the same-org child. + for _, p := range peers { + if id, _ := p["id"].(string); id == "org-b-root" { + t.Fatalf("cross-tenant leak (#1953): org-b-root appeared in org-a-root's peer list: %v", peers) + } + } + if len(peers) != 1 { + t.Fatalf("expected exactly 1 peer (same-org child), got %d: %v", len(peers), peers) + } + // NOTE: ExpectationsWereMet is intentionally NOT asserted — the leaky + // sibling expectation is deliberately left unconsumed by the fixed path. +} + +// TestPeers_SameOrg_SiblingsStillWork is the positive companion: a non-root +// child caller still sees its same-org siblings, children, and parent. This +// guards against the fix over-scoping and breaking legitimate intra-org +// discovery. +func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + handler := NewDiscoveryHandler() + + caller := "org-a-child-1" + parent := "org-a-root" + + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent)) + + // Siblings — scoped to the shared parent (one tenant). + mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2"). + WithArgs(parent, caller). + WillReturnRows(sqlmock.NewRows(peerColsForIsolation). + AddRow("org-a-child-2", "Org A Sibling", "worker", 1, "online", []byte("null"), "http://a-sib", parent, 0)) + + // Children — none. + mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status"). + WithArgs(caller, caller). + WillReturnRows(sqlmock.NewRows(peerColsForIsolation)) + + // Parent. + mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status"). + WithArgs(parent, caller). + WillReturnRows(sqlmock.NewRows(peerColsForIsolation). + AddRow(parent, "Org A Root", "lead", 0, "online", []byte("null"), "http://a-root", nil, 0)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: caller}} + c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil) + + handler.Peers(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var peers []map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil { + t.Fatalf("failed to parse response: %v", err) + } + // Sibling + parent = 2 same-org peers. + if len(peers) != 2 { + t.Fatalf("expected 2 same-org peers (sibling + parent), got %d: %v", len(peers), peers) + } + names := map[string]bool{} + for _, p := range peers { + names[fmt.Sprint(p["name"])] = true + } + if !names["Org A Sibling"] || !names["Org A Root"] { + t.Errorf("expected same-org sibling + parent in peer list, got %v", names) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// ------------------------------------------------------------------------- +// Path 2: MCP toolListPeers — mcp_tools.toolListPeers +// ------------------------------------------------------------------------- + +// mcpPeerCols matches toolListPeers' SELECT column set. +var mcpPeerCols = []string{"id", "name", "role", "status", "tier"} + +// TestToolListPeers_CrossTenant_OrgRootNotLeaked is the #1953 regression for +// the MCP path. Same shape as the discovery test: an org-root caller must NOT +// enumerate other tenants' org roots. The cross-tenant `parent_id IS NULL` +// sibling query is intentionally not registered, so if it runs sqlmock fails. +func TestToolListPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) { + mock := setupTestDB(t) + mock.MatchExpectationsInOrder(false) + h := &MCPHandler{database: dbHandleForTest()} + + caller := "org-a-root" + + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil)) + + // LEAKY sibling query (pre-fix). Returns another tenant's org root. The fix + // must NOT issue this for an org-root caller; if it does, org-b-root leaks + // into the output and the assertion below fails. Left optional via + // unordered matching, so the fixed path simply never consumes it. + mock.ExpectQuery("WHERE w.parent_id IS NULL AND w.id != \\$1"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows(mcpPeerCols). + AddRow("org-b-root", "Org B Root", "lead", "online", 0)) + + // Children — caller's own org-A children only. + mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows(mcpPeerCols). + AddRow("org-a-child", "Org A Child", "worker", "online", 1)) + + out, err := h.toolListPeers(context.Background(), caller) + if err != nil { + t.Fatalf("toolListPeers returned error: %v", err) + } + if strings.Contains(out, "org-b-root") || strings.Contains(out, "Org B Root") { + t.Fatalf("cross-tenant leak (#1953): another tenant's org root appeared in toolListPeers output:\n%s", out) + } + if !strings.Contains(out, "org-a-child") { + t.Errorf("same-org child missing from toolListPeers output:\n%s", out) + } + // ExpectationsWereMet intentionally NOT asserted — leaky sibling expectation + // is deliberately left unconsumed by the fixed path. +} + +// TestToolListPeers_SameOrg_SiblingsStillWork — positive companion for the MCP +// path: a non-root child still enumerates its same-org siblings + children + parent. +func TestToolListPeers_SameOrg_SiblingsStillWork(t *testing.T) { + mock := setupTestDB(t) + h := &MCPHandler{database: dbHandleForTest()} + + caller := "org-a-child-1" + parent := "org-a-root" + + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent)) + + // Siblings — scoped to shared parent. + mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status"). + WithArgs(parent, caller). + WillReturnRows(sqlmock.NewRows(mcpPeerCols). + AddRow("org-a-child-2", "Org A Sibling", "worker", "online", 1)) + + // Children — none. + mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows(mcpPeerCols)) + + // Parent. + mock.ExpectQuery("WHERE w.id = \\$1 AND w.status"). + WithArgs(parent). + WillReturnRows(sqlmock.NewRows(mcpPeerCols). + AddRow(parent, "Org A Root", "lead", "online", 0)) + + out, err := h.toolListPeers(context.Background(), caller) + if err != nil { + t.Fatalf("toolListPeers returned error: %v", err) + } + if !strings.Contains(out, "Org A Sibling") || !strings.Contains(out, "Org A Root") { + t.Errorf("expected same-org sibling + parent in toolListPeers output:\n%s", out) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// ------------------------------------------------------------------------- +// Path 3: a2a routing — a2a_proxy.proxyA2ARequest / resolveAgentURL +// ------------------------------------------------------------------------- + +// TestProxyA2A_CrossTenant_RoutingDenied is the #1953 regression for a2a +// routing. Caller and target are both org roots (parent_id IS NULL) belonging +// to DIFFERENT tenants. Pre-fix, CanCommunicate's "root-level siblings" rule +// waved this through and resolveAgentURL routed to the foreign tenant. Post-fix +// the org-scope guard resolves each to a different org root and returns 403 +// BEFORE resolveAgentURL/dispatch. +func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + caller := "org-a-root" + target := "org-b-root" // different tenant + + // A URL exists for the target; the guard must deny BEFORE it is used. + mr.Set(fmt.Sprintf("ws:%s:url", target), "http://localhost:1") + + // CanCommunicate: both root-level (parent_id NULL) → its weak "root-level + // siblings" rule ALLOWS this. The org guard must catch it afterward. + mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, nil)) + mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). + WithArgs(target). + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, nil)) + + // #1953 org-scope guard: caller resolves to org-a-root, target to org-b-root + // → different orgs → 403. (Each org root resolves to itself.) + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller)) + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(target). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: target}} + body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"cross-tenant"}]}}}` + c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + c.Request.Header.Set("X-Workspace-ID", caller) + + handler.ProxyA2A(c) + + if w.Code != http.StatusForbidden { + t.Fatalf("expected 403 for cross-tenant a2a routing, got %d: %s", w.Code, w.Body.String()) + } + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("body not JSON: %v", err) + } + if msg, _ := resp["error"].(string); !strings.Contains(msg, "different org") { + t.Errorf("expected cross-org denial message, got %v", resp["error"]) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestResolveAgentURL_CrossTenant_RejectedViaSameOrg is a direct unit test of +// the sameOrg primitive that gates resolveAgentURL: a target in a different org +// must be reported as NOT same-org, so the a2a guard rejects it before +// resolveAgentURL is ever called. +func TestResolveAgentURL_CrossTenant_RejectedViaSameOrg(t *testing.T) { + mock := setupTestDB(t) + + caller := "org-a-root" + target := "org-b-root" + + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller)) + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(target). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target)) + + ok, err := sameOrg(context.Background(), dbHandleForTest(), caller, target) + if err != nil { + t.Fatalf("sameOrg returned unexpected error: %v", err) + } + if ok { + t.Errorf("expected cross-tenant workspaces to be reported as DIFFERENT orgs, got sameOrg=true") + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestProxyA2A_SameOrg_RoutingAllowed — positive companion for a2a: two +// same-org siblings route successfully (mirrors TestProxyA2A_CallerIDPropagated +// but named to document the #1953 same-org allow path). +func TestProxyA2A_SameOrg_RoutingAllowed(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + waitForHandlerAsyncBeforeDBCleanup(t, handler) + + caller := "org-a-child-1" + target := "org-a-child-2" + parent := "org-a-root" + + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`) + })) + defer agentServer.Close() + mr.Set(fmt.Sprintf("ws:%s:url", target), agentServer.URL) + + // CanCommunicate — siblings under shared parent. + mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, parent)) + mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). + WithArgs(target). + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, parent)) + + // #1953 org guard — both resolve to the same org root → allowed. + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(caller). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent)) + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(target). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent)) + + expectBudgetCheck(mock, target) + mock.ExpectExec("INSERT INTO activity_logs").WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: target}} + body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"same-org"}]}}}` + c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + c.Request.Header.Set("X-Workspace-ID", caller) + + handler.ProxyA2A(c) + time.Sleep(50 * time.Millisecond) // allow the async logA2ASuccess INSERT to flush + + if w.Code != http.StatusOK { + t.Fatalf("expected 200 for same-org a2a routing, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} diff --git a/workspace-server/internal/handlers/delegation_executor_integration_test.go b/workspace-server/internal/handlers/delegation_executor_integration_test.go index ed41a8da4..193ca385d 100644 --- a/workspace-server/internal/handlers/delegation_executor_integration_test.go +++ b/workspace-server/internal/handlers/delegation_executor_integration_test.go @@ -140,7 +140,14 @@ func buildHTTPResponse(statusCode int, body string) []byte { } // setupIntegrationFixtures inserts the rows executeDelegation requires: -// - workspaces: source and target (siblings, parent_id=NULL so CanCommunicate=true) +// - workspaces: source (org root) + target as its CHILD, so both live in the +// SAME org. CanCommunicate=true (parent↔child) AND the #1953 sameOrg() guard +// in proxyA2ARequest passes (both resolve to the same org root). A real +// delegation happens INSIDE one org. (Previously both were parent_id=NULL — +// two DISTINCT org roots — which only "communicated" via CanCommunicate's +// root-sibling rule; #1953 added a sameOrg() guard that now denies routing +// between two org roots as cross-tenant, so the success-path tests below +// must use a same-org source/target pair.) // - activity_logs: the 'delegate' row that updateDelegationStatus UPDATE will find // - delegations: the ledger row that recordLedgerStatus will UPDATE // @@ -148,13 +155,14 @@ func buildHTTPResponse(statusCode int, body string) []byte { func setupIntegrationFixtures(t *testing.T, conn *sql.DB) func() { t.Helper() ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + sourceID := integrationTestSourceID // org root (parent_id NULL); target hangs off it for _, ws := range []struct { id string name string parentID *string }{ {integrationTestSourceID, "test-source", nil}, - {integrationTestTargetID, "test-target", nil}, + {integrationTestTargetID, "test-target", &sourceID}, // child of source → same org } { if _, err := conn.ExecContext(ctx, `INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`, @@ -510,6 +518,94 @@ func TestIntegration_ExecuteDelegation_RedisDown_FallsBackToDB(t *testing.T) { } } +// TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain is the regression gate +// for the org_scope.go recursive-CTE bug (#1953 follow-up). The sqlmock unit +// tests feed sameOrg() a pre-computed root_id row, so they CANNOT catch a wrong +// CTE — they assume it already returns the right value. Only a real Postgres +// run exercises orgRootSubtreeCTE itself. +// +// The bug: the CTE carried `id AS root_id` from the recursive SEED, so a +// non-root workspace resolved to ITSELF instead of its topmost ancestor. That +// made sameOrg() return false for two genuinely same-org workspaces and 403 a +// legitimate same-org a2a route (over-block). This test seeds a real +// root → child → grandchild chain plus a separate org root, and asserts: +// - every node in the chain resolves to the SAME org root (root, child, grandchild) +// - two workspaces in the same chain are sameOrg (incl. grandchild ↔ root) +// - a workspace in a DIFFERENT chain is NOT sameOrg (cross-tenant stays closed) +func TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain(t *testing.T) { + conn := integrationDB(t) + + const ( + rootA = "11111111-1111-1111-1111-111111111111" + childA = "22222222-2222-2222-2222-222222222222" + grandchildA = "33333333-3333-3333-3333-333333333333" + rootB = "44444444-4444-4444-4444-444444444444" + ) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + t.Cleanup(func() { + c2, cancel2 := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel2() + // Delete leaf-first to respect the parent_id self-FK. + for _, id := range []string{grandchildA, childA, rootA, rootB} { + conn.ExecContext(c2, `DELETE FROM workspaces WHERE id = $1`, id) + } + }) + + // Insert parent-before-child to satisfy the self-referential FK. + seed := []struct { + id, name string + parent *string + }{ + {rootA, "org-a-root", nil}, + {childA, "org-a-child", strPtr(rootA)}, + {grandchildA, "org-a-grandchild", strPtr(childA)}, + {rootB, "org-b-root", nil}, + } + for _, s := range seed { + if _, err := conn.ExecContext(ctx, + `INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`, + s.id, s.name, s.parent); err != nil { + t.Fatalf("seed %s: %v", s.name, err) + } + } + + // Every node in chain A must resolve to rootA via the REAL CTE. + for _, id := range []string{rootA, childA, grandchildA} { + got, err := orgRootID(ctx, conn, id) + if err != nil { + t.Fatalf("orgRootID(%s): %v", id, err) + } + if got != rootA { + t.Errorf("orgRootID(%s) = %q, want rootA %q (CTE must walk to topmost ancestor)", id, got, rootA) + } + } + + // Same-org positives — including the grandchild↔root pair that the buggy + // CTE got wrong. + for _, pair := range [][2]string{{childA, grandchildA}, {rootA, grandchildA}, {rootA, childA}} { + ok, err := sameOrg(ctx, conn, pair[0], pair[1]) + if err != nil { + t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err) + } + if !ok { + t.Errorf("sameOrg(%s,%s) = false, want true (same org chain)", pair[0], pair[1]) + } + } + + // Cross-org negative — isolation must stay closed. + for _, pair := range [][2]string{{rootA, rootB}, {grandchildA, rootB}, {childA, rootB}} { + ok, err := sameOrg(ctx, conn, pair[0], pair[1]) + if err != nil { + t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err) + } + if ok { + t.Errorf("sameOrg(%s,%s) = true, want false (different orgs — cross-tenant must stay denied)", pair[0], pair[1]) + } + } +} + // extractHostPort parses "http://127.0.0.1:PORT/" and returns "127.0.0.1:PORT". func extractHostPort(rawURL string) string { // Simple parse: strip "http://" prefix and trailing slash. diff --git a/workspace-server/internal/handlers/delegation_test.go b/workspace-server/internal/handlers/delegation_test.go index 223b8de77..c71454639 100644 --- a/workspace-server/internal/handlers/delegation_test.go +++ b/workspace-server/internal/handlers/delegation_test.go @@ -1059,13 +1059,25 @@ func expectExecuteDelegationBase(mock sqlmock.Sqlmock) { WillReturnResult(sqlmock.NewResult(0, 1)) // CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target). - // Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed. + // Source and target are siblings under one shared parent (one tenant) → + // CanCommunicate allowed. (#1953: they must NOT both be parent_id=NULL — + // two distinct org roots are now treated as DIFFERENT orgs and routing + // between them is denied. A real delegation happens inside one org.) mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). WithArgs(testDeliverySourceID). - WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, nil)) + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, "ws-org-root-159")) mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). WithArgs(testDeliveryTargetID). - WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, nil)) + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, "ws-org-root-159")) + + // #1953 cross-tenant guard: same-org check after CanCommunicate. Both + // resolve to the same org root → routing allowed. + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(testDeliverySourceID). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159")) + mock.ExpectQuery("WITH RECURSIVE org_chain AS"). + WithArgs(testDeliveryTargetID). + WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159")) // resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url), // so resolveAgentURL gets a cache hit and never falls back to DB. diff --git a/workspace-server/internal/handlers/discovery.go b/workspace-server/internal/handlers/discovery.go index f97408ae7..ce679b29d 100644 --- a/workspace-server/internal/handlers/discovery.go +++ b/workspace-server/internal/handlers/discovery.go @@ -237,7 +237,17 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) { var peers []map[string]interface{} - // Siblings + // Siblings — workspaces sharing the caller's parent. + // + // #1953 cross-tenant isolation: the OLD code's else-branch handled the + // org-root caller (parent_id IS NULL) by returning EVERY workspace with + // parent_id IS NULL — i.e. every other tenant's org root, since the + // workspaces table has no org_id column. That leaked peer identities/URLs + // across tenants. An org root has no siblings inside its own org (each + // tenant is a distinct org root), so the org-root caller now gets an empty + // sibling set; its real peers are its children, returned below. Only the + // parent_id-bound branch enumerates siblings, and that is already scoped to + // one parent (one tenant). if parentID.Valid { siblings, _ := queryPeerMaps(` SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status, @@ -246,14 +256,6 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) { FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`, parentID.String, workspaceID) peers = append(peers, siblings...) - } else { - siblings, _ := queryPeerMaps(` - SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status, - COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''), - w.parent_id, w.active_tasks - FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`, - workspaceID) - peers = append(peers, siblings...) } // Children — exclude self defensively. A child row whose parent_id diff --git a/workspace-server/internal/handlers/discovery_test.go b/workspace-server/internal/handlers/discovery_test.go index d803226a6..be855323a 100644 --- a/workspace-server/internal/handlers/discovery_test.go +++ b/workspace-server/internal/handlers/discovery_test.go @@ -223,10 +223,10 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) { peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"} - // Siblings (other root-level workspaces) — none - mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1"). - WithArgs("ws-root-alone"). - WillReturnRows(sqlmock.NewRows(peerCols)) + // #1953: an org-root caller (parent_id IS NULL) now issues NO sibling + // query at all. The old `WHERE w.parent_id IS NULL` sibling read returned + // EVERY tenant's org root (cross-tenant leak); an org root has no siblings + // inside its own org, so the handler skips the sibling read entirely. // Children — none. #383 added explicit `w.id != $2` self-filter. mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2"). diff --git a/workspace-server/internal/handlers/handlers_extended_test.go b/workspace-server/internal/handlers/handlers_extended_test.go index 5b82e7fc6..fdd86d489 100644 --- a/workspace-server/internal/handlers/handlers_extended_test.go +++ b/workspace-server/internal/handlers/handlers_extended_test.go @@ -453,6 +453,14 @@ func TestExtended_DiscoverMissingHeader(t *testing.T) { // ---------- TestPeers (Extended) ---------- +// TestExtended_Peers verifies a root-level (org-root) workspace's peer view. +// +// #1953: previously a root-level caller issued `WHERE w.parent_id IS NULL` +// for siblings, which returned EVERY other tenant's org root as a "peer" +// (cross-tenant leak, since the workspaces table has no org_id column). After +// the fix an org root has no cross-tenant siblings; its only peers are its own +// children. This test asserts the child is returned and that NO sibling query +// is issued (no `parent_id IS NULL` read). func TestExtended_Peers(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) @@ -463,17 +471,14 @@ func TestExtended_Peers(t *testing.T) { WithArgs("ws-peer"). WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil)) - // Expect root-level siblings query (parent IS NULL, excluding self) - mock.ExpectQuery("SELECT w.id, w.name"). - WithArgs("ws-peer"). - WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}). - AddRow("ws-sibling", "Sibling Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", nil, 0)) + // NO root-level sibling query is issued for an org-root caller anymore. - // Expect children query (workspaces with parent_id = ws-peer, excluding self) - // Query now binds (parent_id, self_id) for the self-filter guard added in #383. + // Children query (workspaces with parent_id = ws-peer, excluding self). + // Query binds (parent_id, self_id) for the self-filter guard added in #383. mock.ExpectQuery("SELECT w.id, w.name"). WithArgs("ws-peer", "ws-peer"). - WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"})) + WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}). + AddRow("ws-child", "Child Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", "ws-peer", 0)) // No parent query since workspace is root-level @@ -493,10 +498,10 @@ func TestExtended_Peers(t *testing.T) { t.Fatalf("failed to parse response: %v", err) } if len(resp) != 1 { - t.Fatalf("expected 1 peer, got %d", len(resp)) + t.Fatalf("expected 1 peer (the child), got %d", len(resp)) } - if resp[0]["name"] != "Sibling Agent" { - t.Errorf("expected peer name 'Sibling Agent', got %v", resp[0]["name"]) + if resp[0]["name"] != "Child Agent" { + t.Errorf("expected peer name 'Child Agent', got %v", resp[0]["name"]) } if err := mock.ExpectationsWereMet(); err != nil { diff --git a/workspace-server/internal/handlers/mcp_tools.go b/workspace-server/internal/handlers/mcp_tools.go index cadc07963..247071007 100644 --- a/workspace-server/internal/handlers/mcp_tools.go +++ b/workspace-server/internal/handlers/mcp_tools.go @@ -97,7 +97,15 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str const cols = `SELECT w.id, w.name, COALESCE(w.role,''), w.status, w.tier` - // Siblings + // Siblings — workspaces sharing the caller's parent. + // + // #1953 cross-tenant isolation: the OLD else-branch returned every + // workspace with parent_id IS NULL when the caller was itself an org root, + // i.e. every other tenant's org root (the workspaces table has no org_id + // column). That leaked peer identities across tenants via MCP list_peers. + // An org root has no siblings inside its own org, so the org-root caller + // now gets no siblings; its peers are its children, enumerated below. Only + // the parent_id-bound branch enumerates siblings, scoped to one tenant. if parentID.Valid { rows, err := h.database.QueryContext(ctx, cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`, @@ -107,15 +115,6 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr) } } - } else { - rows, err := h.database.QueryContext(ctx, - cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`, - workspaceID) - if err == nil { - if scanErr := scanPeers(rows); scanErr != nil { - log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr) - } - } } // Children diff --git a/workspace-server/internal/handlers/org_scope.go b/workspace-server/internal/handlers/org_scope.go new file mode 100644 index 000000000..a9ccb4957 --- /dev/null +++ b/workspace-server/internal/handlers/org_scope.go @@ -0,0 +1,104 @@ +package handlers + +// org_scope.go — cross-tenant isolation helpers (#1953). +// +// The `workspaces` table has no `org_id` column; an "org" is the subtree of +// workspaces reachable through the `parent_id` chain from a single org root +// (a row with parent_id IS NULL). Several code paths historically computed an +// org-root sibling set as `WHERE parent_id IS NULL`, which matches EVERY +// tenant's org root and therefore leaks peer metadata / routing across tenants. +// +// This file centralises the org-scoping primitive so peer discovery, the MCP +// list_peers tool, and a2a routing all derive "the caller's org" the SAME way +// the OFFSEC-015 broadcast fix (commit 5a05302c, workspace_broadcast.go) does: +// a recursive CTE that walks the parent_id chain up to the org root. Keeping +// the CTE in one place means there is a single, testable source of truth for +// tenant isolation rather than four hand-copied queries that can drift. +// +// NOTE: this is the parent_id-chain scoping that the broadcast fix already +// ships. It is deliberately NOT an `org_id` column — adding that column is a +// separate architecture decision pending CTO sign-off. See #1953. + +import ( + "context" + "database/sql" + "errors" +) + +// errNoOrgRoot is returned by orgRootID when the workspace id has no row (and +// therefore no resolvable org root). Callers translate this into a 404/not-found +// at their own layer; it is distinct from a transient DB error so a missing +// workspace never gets treated as "belongs to every org". +var errNoOrgRoot = errors.New("org root not found for workspace") + +// orgRootSubtreeCTE is the recursive CTE — identical in shape to the OFFSEC-015 +// broadcast fix — that walks UP the parent_id chain from a single workspace to +// its org root. The org root is the row on the chain whose parent_id IS NULL. +// +// $1 = workspace id to resolve +// +// The recursive member walks UP the parent_id chain: each step joins to the row +// whose id is the current row's parent_id. The topmost ancestor is the single +// chain row with parent_id IS NULL — and THAT row's own `id` is the org root. +// +// We select that parentless row's `id` (aliased root_id). We must NOT carry a +// fixed `id AS root_id` from the recursive seed: that value is just the input +// workspace id, so a non-root caller (e.g. a child delegating to a sibling) +// would resolve to ITSELF instead of its org root, and sameOrg() would wrongly +// report two genuinely same-org workspaces as different orgs and 403 a +// legitimate a2a route. A workspace that already IS an org root has a one-row +// chain whose id == itself, so it correctly resolves to itself. +const orgRootSubtreeCTE = ` + WITH RECURSIVE org_chain AS ( + SELECT id, parent_id + FROM workspaces + WHERE id = $1 + UNION ALL + SELECT w.id, w.parent_id + FROM workspaces w + JOIN org_chain c ON w.id = c.parent_id + ) + SELECT id AS root_id FROM org_chain WHERE parent_id IS NULL LIMIT 1 +` + +// orgRootID resolves the org root of `workspaceID` by walking the parent_id +// chain via orgRootSubtreeCTE. Returns errNoOrgRoot when the workspace (or its +// chain) yields no org root row, and the underlying error on any DB failure. +// +// This is the SAME lookup the broadcast handler performs inline; the three +// leak paths in #1953 call this instead of re-deriving "the org" from +// `parent_id IS NULL` (which spans all tenants). +func orgRootID(ctx context.Context, database *sql.DB, workspaceID string) (string, error) { + var root string + err := database.QueryRowContext(ctx, orgRootSubtreeCTE, workspaceID).Scan(&root) + if errors.Is(err, sql.ErrNoRows) { + return "", errNoOrgRoot + } + if err != nil { + return "", err + } + if root == "" { + return "", errNoOrgRoot + } + return root, nil +} + +// sameOrg reports whether workspaces `a` and `b` share an org root, i.e. they +// belong to the same tenant. Used by a2a routing to reject resolving/dispatching +// to a workspace id outside the caller's org. Fail-CLOSED: any lookup error or +// missing org root yields (false, err) so a DB hiccup denies cross-tenant +// routing rather than allowing it. +func sameOrg(ctx context.Context, database *sql.DB, a, b string) (bool, error) { + if a == b { + return true, nil + } + rootA, err := orgRootID(ctx, database, a) + if err != nil { + return false, err + } + rootB, err := orgRootID(ctx, database, b) + if err != nil { + return false, err + } + return rootA == rootB, nil +}