feat(org-import): make createWorkspaceTree idempotent (Phase 3 of #2857)

OrgHandler.Import was non-idempotent — every call INSERTed a fresh row
for every workspace in the tree, regardless of whether matching
workspaces already existed. Calling /org/import twice with the same
template duplicated the entire tree.

This was the bigger leak source than TeamHandler.Expand (deleted in
PR #2856). tenant-hongming accumulated 72 distinct child workspaces
in 4 days entirely from repeated org-template spawns of the same
template — the (tier × runtime) matrix in the audit data was the
template's static shape, multiplied by spawn count.

Fix: route through a new lookupExistingChild helper before INSERT.
Skip-if-exists semantics by default:
- Match on (parent_id, name) using `IS NOT DISTINCT FROM` so NULL
  parents (root workspaces) are included.
- Ignore status='removed' rows so collapsed teams or deleted
  workspaces don't block re-import.
- Recursion still runs on the existing id so partial-match templates
  (parent exists, some children missing) backfill correctly instead
  of either no-op'ing the whole subtree or duplicating the existing
  children.
- Result entries for skipped nodes carry skipped:true so callers
  (canvas Import preflight modal) can surface "5 of 7 already
  existed, 2 created."

The recursion that walked ws.Children is extracted into
recurseChildrenForImport so both the create-path and the skip-path
share one implementation — no duplicated grid math, no two paths to
keep in sync.

Note: replace_if_exists semantics (re-roll: stop+delete old, create
new) are deferred. Skip-if-exists alone closes the leak; re-roll is
a later UX decision for the canvas Import preflight modal.

Tests:
- 4 sqlmock cases on lookupExistingChild: not-found, found,
  nil-parent (the IS NOT DISTINCT FROM NULL trick), DB-error
  propagates (must fail fast — silent fallback to INSERT is the
  failure mode the helper exists to prevent).
- 1 source-level AST gate (per memory feedback_behavior_based_ast_gates.md):
  pins that h.lookupExistingChild( appears BEFORE INSERT INTO workspaces
  in org_import.go. If a future refactor reintroduces the un-checked
  INSERT, the gate fails. Verified load-bearing by removing the call —
  build fails (helper symbol gone).

go vet ./... clean. go test ./internal/handlers/ -count 1 — all green
(4.2s, no regression on existing OrgImport / Provision / Team tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-05-05 03:37:49 -07:00
parent 471dff25e9
commit d6337a1ae9
2 changed files with 235 additions and 24 deletions

View File

@ -5,6 +5,7 @@ package handlers
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"log"
@ -64,6 +65,33 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
tier = 2
}
ctxLookup := context.Background()
// Idempotency: if a workspace with the same (parent_id, name) already
// exists, skip the INSERT + canvas_layouts + broadcast + provisioning.
// This is what makes /org/import safe to call multiple times — the
// historical leak was every call recreating the entire tree (see
// tenant-hongming, 72 distinct child workspaces in 4 days, all from
// repeated org-template spawns of the same template).
//
// Recursion still runs on the existing id so partial-match templates
// (parent exists, some children missing) backfill the missing children
// instead of either no-op'ing the whole subtree or duplicating the
// existing children.
existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
if lookupErr != nil {
return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr)
}
if existing {
log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID)
*results = append(*results, map[string]interface{}{
"id": existingID,
"name": ws.Name,
"tier": tier,
"skipped": true,
})
return h.recurseChildrenForImport(ws, existingID, absX, absY, defaults, orgBaseDir, results, provisionSem)
}
id := uuid.New().String()
awarenessNS := workspaceAwarenessNamespace(id)
@ -539,31 +567,63 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
}
*results = append(*results, resultEntry)
// Recurse into children. Brief pacing avoids overwhelming Docker when
// creating many containers in sequence; container provisioning runs in
// goroutines so the main createWorkspaceTree returns quickly.
// Children's abs coords = this.abs + childSlotInGrid(index, siblingSizes),
// with sibling sizes computed by sizeOfSubtree so a nested-parent
// child claims a bigger grid slot than a leaf sibling — no slot
// clipping across mixed leaf / parent siblings.
if len(ws.Children) > 0 {
siblingSizes := make([]nodeSize, len(ws.Children))
for i, c := range ws.Children {
siblingSizes[i] = sizeOfSubtree(c)
}
for i, child := range ws.Children {
slotX, slotY := childSlotInGrid(i, siblingSizes)
childAbsX := absX + slotX
childAbsY := absY + slotY
// slotX/slotY are already parent-relative — that's
// exactly what childSlotInGrid returns.
if err := h.createWorkspaceTree(child, &id, childAbsX, childAbsY, slotX, slotY, defaults, orgBaseDir, results, provisionSem); err != nil {
return err
}
time.Sleep(workspaceCreatePacingMs * time.Millisecond)
}
}
// Recurse into children — both create-path and skip-path use the
// same helper so partial-match (parent exists, some children missing)
// backfills correctly without duplicating the recursion logic.
return h.recurseChildrenForImport(ws, id, absX, absY, defaults, orgBaseDir, results, provisionSem)
}
// lookupExistingChild returns the id of an existing workspace under
// (parent_id, name) if any, with idempotency-friendly semantics:
// - parent_id IS NOT DISTINCT FROM matches NULL too (root workspaces)
// - status='removed' rows are ignored — collapsed teams or deleted
// workspaces shouldn't block a re-import
//
// On sql.ErrNoRows: returns ("", false, nil) — caller should INSERT.
// On a real DB error: returns ("", false, err) — caller propagates.
func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
var existingID string
err := db.DB.QueryRowContext(ctx, `
SELECT id FROM workspaces
WHERE name = $1
AND parent_id IS NOT DISTINCT FROM $2
AND status != 'removed'
LIMIT 1
`, name, parentID).Scan(&existingID)
if err == sql.ErrNoRows {
return "", false, nil
}
if err != nil {
return "", false, err
}
return existingID, true, nil
}
// recurseChildrenForImport walks ws.Children once, computing each child's
// absolute + parent-relative canvas coordinates from the subtree-aware
// grid (so nested-parent children don't clip into leaf siblings) and
// dispatching createWorkspaceTree for each. Pacing prevents Docker
// container-spam thundering on the self-hosted backend; SaaS dispatches
// the EC2 provision in a goroutine so the main loop is not blocked.
func (h *OrgHandler) recurseChildrenForImport(ws OrgWorkspace, parentID string, absX, absY float64, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
if len(ws.Children) == 0 {
return nil
}
siblingSizes := make([]nodeSize, len(ws.Children))
for i, c := range ws.Children {
siblingSizes[i] = sizeOfSubtree(c)
}
for i, child := range ws.Children {
slotX, slotY := childSlotInGrid(i, siblingSizes)
childAbsX := absX + slotX
childAbsY := absY + slotY
// slotX/slotY are already parent-relative — that's
// exactly what childSlotInGrid returns.
if err := h.createWorkspaceTree(child, &parentID, childAbsX, childAbsY, slotX, slotY, defaults, orgBaseDir, results, provisionSem); err != nil {
return err
}
time.Sleep(workspaceCreatePacingMs * time.Millisecond)
}
return nil
}

View File

@ -0,0 +1,151 @@
package handlers
import (
"bytes"
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
)
// Tests for the idempotency helper added in #2859 (RFC #2857 Phase 3).
//
// Background: org_import.createWorkspaceTree was non-idempotent —
// every call INSERTed a fresh row for every workspace in the tree,
// regardless of whether matching workspaces already existed. Calling
// /org/import twice with the same template duplicated the entire tree;
// in a 4-day window tenant-hongming accumulated 72 stale child
// workspaces this way.
//
// The fix routes through lookupExistingChild before INSERT. These
// tests pin the helper's three observable behaviors plus an AST gate
// that catches future re-introductions of the un-checked INSERT.
func TestLookupExistingChild_NotFound_ReturnsFalseNoError(t *testing.T) {
mock := setupTestDB(t)
// 0-row result → driver returns sql.ErrNoRows on Scan.
parent := "parent-1"
mock.ExpectQuery(`SELECT id FROM workspaces`).
WithArgs("Alpha", &parent).
WillReturnRows(sqlmock.NewRows([]string{"id"}))
h := &OrgHandler{}
id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
if err != nil {
t.Fatalf("expected nil error on no-rows, got: %v", err)
}
if found {
t.Errorf("expected found=false on no-rows, got found=true")
}
if id != "" {
t.Errorf("expected empty id on no-rows, got %q", id)
}
}
func TestLookupExistingChild_Found_ReturnsIDAndTrue(t *testing.T) {
mock := setupTestDB(t)
parent := "parent-1"
mock.ExpectQuery(`SELECT id FROM workspaces`).
WithArgs("Alpha", &parent).
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-existing-uuid"))
h := &OrgHandler{}
id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !found {
t.Errorf("expected found=true when row exists")
}
if id != "ws-existing-uuid" {
t.Errorf("expected id=ws-existing-uuid, got %q", id)
}
}
func TestLookupExistingChild_NilParent_MatchesRoot(t *testing.T) {
// `parent_id IS NOT DISTINCT FROM NULL` is the load-bearing trick —
// a plain `=` would never match a NULL row. Pin that roots
// (parent_id=NULL) are still found by the lookup.
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT id FROM workspaces`).
WithArgs("RootAgent", (*string)(nil)).
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-root-uuid"))
h := &OrgHandler{}
id, found, err := h.lookupExistingChild(context.Background(), "RootAgent", nil)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !found || id != "ws-root-uuid" {
t.Errorf("expected found=true id=ws-root-uuid, got found=%v id=%q", found, id)
}
}
func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
// A real DB error must NOT be silently swallowed. If the SELECT
// can't run, the caller fails fast — never falls back to creating
// a duplicate. That fallback is the failure mode the helper exists
// to prevent.
mock := setupTestDB(t)
parent := "parent-1"
connFail := errors.New("simulated postgres unavailable")
mock.ExpectQuery(`SELECT id FROM workspaces`).
WithArgs("Alpha", &parent).
WillReturnError(connFail)
h := &OrgHandler{}
id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
if err == nil {
t.Fatalf("expected DB error to propagate, got nil")
}
// Helper returns the raw error, not a wrap — match by string for
// portability across error wrapping conventions.
if !strings.Contains(err.Error(), "simulated postgres unavailable") {
t.Errorf("expected the original DB error to surface, got: %v", err)
}
if found {
t.Errorf("expected found=false on DB error, got found=true")
}
if id != "" {
t.Errorf("expected empty id on DB error, got %q", id)
}
}
// Source-level guard — pins that org_import.go calls
// h.lookupExistingChild BEFORE its INSERT INTO workspaces.
//
// Per memory feedback_behavior_based_ast_gates.md: pin the behavior
// (idempotency check before INSERT), not just function names. If a
// future refactor reintroduces the un-checked INSERT (the original
// bug shape that leaked 72 workspaces in 4 days), this test fails.
func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
src, err := os.ReadFile(filepath.Join(wd, "org_import.go"))
if err != nil {
t.Fatalf("read org_import.go: %v", err)
}
lookupAt := bytes.Index(src, []byte("h.lookupExistingChild("))
insertAt := bytes.Index(src, []byte("INSERT INTO workspaces"))
if lookupAt < 0 {
t.Fatalf("org_import.go missing call to h.lookupExistingChild — idempotency check removed?")
}
if insertAt < 0 {
t.Fatalf("org_import.go missing INSERT INTO workspaces — schema change?")
}
if lookupAt > insertAt {
t.Errorf("h.lookupExistingChild must come BEFORE INSERT INTO workspaces in org_import.go (lookup@%d, insert@%d) — non-idempotent ordering would re-leak under repeat /org/import calls", lookupAt, insertAt)
}
}