fix(installPlatformAgent): harden org_plugin_allowlist migration + row locks + truthful status (core#2508) #2528

Merged
agent-reviewer merged 9 commits from fix/core-2508-install-platform-agent-hardening into main 2026-06-11 04:55:49 +00:00
2 changed files with 154 additions and 6 deletions
@@ -503,12 +503,34 @@ func installPlatformAgent(ctx context.Context, database *sql.DB, platformID, nam
}
defer func() { _ = tx.Rollback() }() // no-op after Commit
// 0. If a different platform root already exists, downgrade it to 'workspace'
// so it no longer blocks the partial unique index uniq_workspaces_one_platform_root.
// It will then be picked up as an old root and re-parented under the new
// platform agent (defense-in-depth: covers pathological cases where a
// platform root was created with a non-canonical id, e.g. test fixtures
// or a prior failed migration).
if _, err := tx.ExecContext(ctx, `
UPDATE workspaces SET kind = 'workspace', updated_at = now()
WHERE kind = 'platform' AND parent_id IS NULL AND id <> $1
`, platformID); err != nil {
return fmt.Errorf("downgrade existing platform root: %w", err)
}
// 1. Ensure the platform-agent row exists as a kind='platform' root.
// ON CONFLICT keeps it a platform root if it was pre-seeded; the row is
// tier 0 and never billed/provisioned as an ordinary workspace EC2.
// Status starts as 'offline' — there is no container yet; 'online' is a
// green-dot lie until the first heartbeat (core#2508).
//
// parent_name_uniq collision: the platform-agent name is deterministic
// and unique per org by construction (e.g. "Org Concierge"). A pre-
// existing root with the same name is a data inconsistency; we fail loud
// rather than silently rename/reparent, which could orphan billing or
// provisioning state. The integration tests use unique names per fixture
// to avoid cross-test collision (CR-A RC 10610).
if _, err := tx.ExecContext(ctx, `
INSERT INTO workspaces (id, name, kind, tier, status, runtime, parent_id)
VALUES ($1, $2, 'platform', 0, 'online', 'claude-code', NULL)
VALUES ($1, $2, 'platform', 0, 'offline', 'claude-code', NULL)
ON CONFLICT (id) DO UPDATE SET kind = 'platform', runtime = 'claude-code', parent_id = NULL
`, platformID, name); err != nil {
return fmt.Errorf("upsert platform agent: %w", err)
@@ -517,8 +539,10 @@ func installPlatformAgent(ctx context.Context, database *sql.DB, platformID, nam
// 2. Capture the org's other current roots (everything at parent_id IS NULL
// except the platform agent itself). In a one-org tenant DB this is the
// single team root; the query tolerates 0 (already installed) or N.
// FOR UPDATE prevents a root created mid-install from escaping re-parent
// (core#2508).
rows, err := tx.QueryContext(ctx,
`SELECT id FROM workspaces WHERE parent_id IS NULL AND id <> $1`, platformID)
`SELECT id FROM workspaces WHERE parent_id IS NULL AND id <> $1 FOR UPDATE`, platformID)
if err != nil {
return fmt.Errorf("select old roots: %w", err)
}
@@ -549,10 +573,30 @@ func installPlatformAgent(ctx context.Context, database *sql.DB, platformID, nam
`UPDATE org_api_tokens SET org_id = $1 WHERE org_id = $2`, platformID, root); err != nil {
return fmt.Errorf("migrate org_api_tokens for %s: %w", root, err)
}
if _, err := tx.ExecContext(ctx,
`UPDATE org_plugin_allowlist SET org_id = $1 WHERE org_id = $2`, platformID, root); err != nil {
// org_plugin_allowlist has UNIQUE(org_id, plugin_name). A plain UPDATE
// collides 23505 when N>1 old roots allowlisted the SAME plugin.
// INSERT…SELECT…ON CONFLICT DO NOTHING deduplicates; DELETE leftovers
// cleans the old-root rows (core#2508).
//
// Column-list matches the actual schema (026_org_plugin_allowlist.up.sql):
// id, org_id, plugin_name, enabled_by, enabled_at
// id is auto-populated by gen_random_uuid(); enabled_by is NOT NULL but
// preserved from the old root row (same admin who enabled it);
// enabled_at is preserved verbatim (no audit-time rewrite on
// re-parenting — the original "when this was enabled" stays stable).
if _, err := tx.ExecContext(ctx, `
INSERT INTO org_plugin_allowlist (org_id, plugin_name, enabled_by, enabled_at)
SELECT $1, plugin_name, enabled_by, enabled_at
FROM org_plugin_allowlist
WHERE org_id = $2
ON CONFLICT (org_id, plugin_name) DO NOTHING
`, platformID, root); err != nil {
return fmt.Errorf("migrate org_plugin_allowlist for %s: %w", root, err)
}
if _, err := tx.ExecContext(ctx,
`DELETE FROM org_plugin_allowlist WHERE org_id = $1`, root); err != nil {
return fmt.Errorf("delete old org_plugin_allowlist for %s: %w", root, err)
}
}
return tx.Commit()
@@ -68,12 +68,14 @@ func TestIntegration_PlatformAgentInstall_ReparentsRootAndMovesAnchors(t *testin
childID := uuid.New().String()
platformID := uuid.New().String()
paName := "Org Concierge " + tag
cleanup := func() {
_, _ = conn.ExecContext(ctx, `DELETE FROM org_plugin_allowlist WHERE plugin_name = $1`, prefix+"-plugin")
_, _ = conn.ExecContext(ctx, `DELETE FROM org_api_tokens WHERE prefix = $1`, tag)
// child + old root (prefixed names) first, then the platform agent by id
// (root.parent_id references it, so it must go last).
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%")
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE name = $1`, paName)
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, platformID)
}
t.Cleanup(cleanup)
@@ -104,7 +106,7 @@ func TestIntegration_PlatformAgentInstall_ReparentsRootAndMovesAnchors(t *testin
}
// Install.
if err := installPlatformAgent(ctx, conn, platformID, "Org Concierge"); err != nil {
if err := installPlatformAgent(ctx, conn, platformID, paName); err != nil {
t.Fatalf("install: %v", err)
}
@@ -170,7 +172,7 @@ func TestIntegration_PlatformAgentInstall_ReparentsRootAndMovesAnchors(t *testin
assertState("first install")
// Idempotent: second install must not error or change state.
if err := installPlatformAgent(ctx, conn, platformID, "Org Concierge"); err != nil {
if err := installPlatformAgent(ctx, conn, platformID, paName); err != nil {
t.Fatalf("second install (idempotent): %v", err)
}
assertState("second install")
@@ -186,3 +188,105 @@ func TestIntegration_PlatformAgentInstall_ReparentsRootAndMovesAnchors(t *testin
t.Fatalf("team roots after install = %d, want 0 (old root re-parented under platform agent)", nRoots)
}
}
// TestIntegration_PlatformAgentInstall_MultiRootAllowlistDedup guards the
// UNIQUE(org_id, plugin_name) collision on org_plugin_allowlist. When N>1
// old roots have allowlisted the SAME plugin, a plain UPDATE…SET org_id
// collides 23505. The INSERT…SELECT…ON CONFLICT DO NOTHING + DELETE leftovers
// path must survive this deterministically (core#2508).
func TestIntegration_PlatformAgentInstall_MultiRootAllowlistDedup(t *testing.T) {
conn := integrationDB_PlatformAgentInstall(t)
ctx := context.Background()
tag := uuid.New().String()[:8]
prefix := fmt.Sprintf("itest-pallow-%s", tag)
rootA := uuid.New().String()
rootB := uuid.New().String()
platformID := uuid.New().String()
paName := "Org Concierge " + tag
cleanup := func() {
_, _ = conn.ExecContext(ctx, `DELETE FROM org_plugin_allowlist WHERE plugin_name = $1`, prefix+"-plugin")
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%")
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE name = $1`, paName)
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, platformID)
}
t.Cleanup(cleanup)
cleanup()
// Seed TWO old roots with the SAME plugin allowlisted.
for _, rid := range []string{rootA, rootB} {
if _, err := conn.ExecContext(ctx, `
INSERT INTO workspaces (id, name, tier, runtime, status, parent_id)
VALUES ($1, $2, 2, 'claude-code', 'online', NULL)`, rid, prefix+"-"+rid[:8]); err != nil {
t.Fatalf("seed root %s: %v", rid, err)
}
if _, err := conn.ExecContext(ctx, `
INSERT INTO org_plugin_allowlist (org_id, plugin_name, enabled_by)
VALUES ($1, $2, $3)`, rid, prefix+"-plugin", rid); err != nil {
t.Fatalf("seed allowlist for %s: %v", rid, err)
}
}
// Install must NOT fail with 23505 unique violation.
if err := installPlatformAgent(ctx, conn, platformID, paName); err != nil {
t.Fatalf("install with duplicate allowlist: %v", err)
}
// The platform agent must end up with exactly ONE row for the plugin.
var count int
if err := conn.QueryRowContext(ctx,
`SELECT count(*) FROM org_plugin_allowlist WHERE org_id = $1`, platformID).Scan(&count); err != nil {
t.Fatalf("count platform-agent allowlist: %v", err)
}
if count != 1 {
t.Fatalf("platform-agent allowlist count = %d, want 1 (deduped from 2 old roots)", count)
}
// Old roots must have ZERO allowlist rows left.
for _, rid := range []string{rootA, rootB} {
var left int
if err := conn.QueryRowContext(ctx,
`SELECT count(*) FROM org_plugin_allowlist WHERE org_id = $1`, rid).Scan(&left); err != nil {
t.Fatalf("count old-root allowlist %s: %v", rid, err)
}
if left != 0 {
t.Fatalf("old root %s still has %d allowlist rows, want 0 (leftovers deleted)", rid, left)
}
}
}
// TestIntegration_PlatformAgentInstall_StatusNotOnline prevents the green-dot
// lie: a freshly inserted platform-agent row must NOT claim status='online'
// when there is no container yet (core#2508).
func TestIntegration_PlatformAgentInstall_StatusNotOnline(t *testing.T) {
conn := integrationDB_PlatformAgentInstall(t)
ctx := context.Background()
platformID := uuid.New().String()
paName := "Org Concierge " + platformID[:8]
cleanup := func() {
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE name = $1`, paName)
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, platformID)
}
t.Cleanup(cleanup)
cleanup()
if err := installPlatformAgent(ctx, conn, platformID, paName); err != nil {
t.Fatalf("install: %v", err)
}
var status string
if err := conn.QueryRowContext(ctx,
`SELECT status FROM workspaces WHERE id = $1`, platformID).Scan(&status); err != nil {
t.Fatalf("read status: %v", err)
}
if status == "online" {
t.Fatalf("fresh platform-agent status=%q, must not be 'online' (no container yet)", status)
}
if status != "offline" {
// Allow future changes, but catch the specific lie.
t.Logf("fresh platform-agent status=%q (expected 'offline', but accepting non-online)", status)
}
}