From 48d19452694788c1f8dbc4d06cfdb0a65eb9968d Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 5 May 2026 08:32:56 -0700 Subject: [PATCH] test(org-import): tighten AST gate to discriminate workspaces vs lookalikes (#2872 Imp-1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous TestCreateWorkspaceTree_CallsLookupBeforeInsert used bytes.Index("INSERT INTO workspaces"), which prefix-matches INSERT INTO workspaces_audit, INSERT INTO workspace_secrets, and INSERT INTO workspace_channels. RFC #2872 cited this as a silent false-pass mode: a future refactor that adds an audit-table INSERT literal earlier in source than the real workspaces INSERT would make the gate point at the wrong target. Replaces the byte-search with a go/ast walk + a regex that requires `\s*\(` after `workspaces` — distinguishes the real target from prefix lookalikes. Adds three discriminating tests: - TestWorkspacesInsertRE_RejectsLookalikes — pins the regex against 9 sql shapes (real, raw-string-literal, audit-shadow, workspace_* prefixes, canvas_layouts, UPDATE/SELECT, comments). - TestGate_FailsWhenLookupAfterInsert — synthesizes Go source where the lookup is positioned AFTER the workspaces INSERT, asserts the helper returns lookupPos > insertPos (which the production gate flags via t.Errorf). Proves the gate isn't vestigial. - TestGate_IgnoresAuditTableShadow — synthesizes source with an audit-table INSERT BEFORE the lookup + real INSERT, asserts the tightened regex correctly walks past the shadow and finds the real INSERT. Also extracts findLookupAndWorkspacesInsertPos as a helper so the gate logic can be exercised against synthetic source, not only against the real org_import.go. Memory: feedback_assert_exact_not_substring.md (verify tightened test FAILS on old code) — TestGate_FailsWhenLookupAfterInsert is the failing-on-bug-shape proof. Closes the silent-false-pass mode of #2872 Important-1. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/org_import_idempotency_test.go | 255 +++++++++++++++++- 1 file changed, 245 insertions(+), 10 deletions(-) diff --git a/workspace-server/internal/handlers/org_import_idempotency_test.go b/workspace-server/internal/handlers/org_import_idempotency_test.go index 0d7498fb..cefc6e74 100644 --- a/workspace-server/internal/handlers/org_import_idempotency_test.go +++ b/workspace-server/internal/handlers/org_import_idempotency_test.go @@ -1,11 +1,15 @@ package handlers import ( - "bytes" "context" "errors" + "go/ast" + "go/parser" + "go/token" "os" "path/filepath" + "regexp" + "strconv" "strings" "testing" @@ -119,6 +123,60 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) { } } +// workspacesInsertRE matches a SQL literal that begins (after optional +// leading whitespace) with `INSERT INTO workspaces` followed by `(` — +// requiring the open-paren rules out lookalikes like +// `INSERT INTO workspaces_audit`, `INSERT INTO workspace_secrets`, +// `INSERT INTO workspace_channels`, `INSERT INTO canvas_layouts`. The +// previous bytes.Index gate accepted `workspaces_audit` as a prefix +// match — see RFC #2872 Important-1 for the silent-false-pass shape. +var workspacesInsertRE = regexp.MustCompile(`(?s)^\s*INSERT\s+INTO\s+workspaces\s*\(`) + +// findLookupAndWorkspacesInsertPos walks the AST of `src` and returns +// the source positions of (a) the first call to `lookupExistingChild` +// and (b) the first CallExpr whose argument list contains a STRING +// BasicLit matching workspacesInsertRE. Either may be token.NoPos if +// not found. +// +// Extracted as a helper so the gate logic can be exercised against +// synthetic source — TestGate_FailsWhenLookupAfterInsert below proves +// the gate actually catches the bug shape, not just the happy path. +func findLookupAndWorkspacesInsertPos(t *testing.T, fname string, src []byte) (lookupPos, insertPos token.Pos, fset *token.FileSet) { + t.Helper() + fset = token.NewFileSet() + file, err := parser.ParseFile(fset, fname, src, parser.ParseComments) + if err != nil { + t.Fatalf("parse %s: %v", fname, err) + } + lookupPos, insertPos = token.NoPos, token.NoPos + ast.Inspect(file, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + if sel, ok := call.Fun.(*ast.SelectorExpr); ok { + if sel.Sel.Name == "lookupExistingChild" && lookupPos == token.NoPos { + lookupPos = call.Pos() + } + } + for _, arg := range call.Args { + lit, ok := arg.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + continue + } + raw := lit.Value + if unq, err := strconv.Unquote(raw); err == nil { + raw = unq + } + if workspacesInsertRE.MatchString(raw) && insertPos == token.NoPos { + insertPos = call.Pos() + } + } + return true + }) + return +} + // Source-level guard — pins that org_import.go calls // h.lookupExistingChild BEFORE its INSERT INTO workspaces. // @@ -126,6 +184,11 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) { // (idempotency check before INSERT), not just function names. If a // future refactor reintroduces the un-checked INSERT (the original // bug shape that leaked 72 workspaces in 4 days), this test fails. +// +// AST-walk implementation closes the silent-false-pass mode that the +// previous bytes.Index gate had — see workspacesInsertRE comment for +// the failure mode (workspaces_audit / workspace_secrets / etc. +// shadowing the real target via prefix match). func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) { wd, err := os.Getwd() if err != nil { @@ -135,17 +198,189 @@ func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) { if err != nil { t.Fatalf("read org_import.go: %v", err) } + lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "org_import.go", src) - lookupAt := bytes.Index(src, []byte("h.lookupExistingChild(")) - insertAt := bytes.Index(src, []byte("INSERT INTO workspaces")) - - if lookupAt < 0 { - t.Fatalf("org_import.go missing call to h.lookupExistingChild — idempotency check removed?") + if lookupPos == token.NoPos { + t.Fatalf("AST: no call to lookupExistingChild in org_import.go — idempotency check removed?") } - if insertAt < 0 { - t.Fatalf("org_import.go missing INSERT INTO workspaces — schema change?") + if insertPos == token.NoPos { + t.Fatalf("AST: no SQL literal matching `^\\s*INSERT INTO workspaces\\s*\\(` in any CallExpr in org_import.go — schema change or rename?") } - if lookupAt > insertAt { - t.Errorf("h.lookupExistingChild must come BEFORE INSERT INTO workspaces in org_import.go (lookup@%d, insert@%d) — non-idempotent ordering would re-leak under repeat /org/import calls", lookupAt, insertAt) + if lookupPos > insertPos { + t.Errorf("lookupExistingChild call at %s must come BEFORE INSERT INTO workspaces at %s — non-idempotent ordering would re-leak under repeat /org/import calls", + fset.Position(lookupPos), fset.Position(insertPos)) + } +} + +// TestGate_FailsWhenLookupAfterInsert proves the gate actually catches +// the bug it's named after — running it against synthetic Go source +// where the lookup call is positioned AFTER the workspaces INSERT must +// produce lookupPos > insertPos, which the production gate flags as +// an ERROR. Without this test the gate could regress to "always pass" +// and we wouldn't notice until the bug shipped again. +// +// Per memory feedback_assert_exact_not_substring.md: verify a +// tightened test FAILS on old code before merging. +func TestGate_FailsWhenLookupAfterInsert(t *testing.T) { + const buggySrc = `package handlers + +import "context" + +type fakeDB struct{} + +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +type fakeOrgHandler struct{} + +func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) { + return "", false, nil +} + +func buggyCreate(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) { + // Bug shape: INSERT runs FIRST, lookup runs AFTER. This is the + // non-idempotent ordering the gate exists to forbid. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name) + h.lookupExistingChild(ctx, name, parentID) +} +` + lookupPos, insertPos, _ := findLookupAndWorkspacesInsertPos(t, "buggy.go", []byte(buggySrc)) + if lookupPos == token.NoPos || insertPos == token.NoPos { + t.Fatalf("synthetic buggy source missing expected nodes (lookupPos=%v insertPos=%v) — helper logic regression", lookupPos, insertPos) + } + if lookupPos < insertPos { + t.Fatalf("synthetic bug shape (lookup AFTER insert) returned lookupPos=%d < insertPos=%d — gate would NOT fire on actual bug, regression!", lookupPos, insertPos) + } + // Implicit: lookupPos > insertPos here, which the production gate + // flags via t.Errorf. This proves the gate is live, not vestigial. +} + +// TestGate_IgnoresAuditTableShadow proves the regex tightening +// actually ignores `INSERT INTO workspaces_audit` literals — the +// specific shape #2872 cited as the silent-false-pass failure mode +// for the previous bytes.Index gate. +func TestGate_IgnoresAuditTableShadow(t *testing.T) { + // Synthetic source with audit-table INSERT at line 1 (would be + // position 0 under prefix-match) and lookup + real INSERT at later + // positions. With the tightened regex, the audit literal is + // ignored: insertPos points at the REAL INSERT, lookup precedes it, + // gate passes correctly. + const src = `package handlers + +import "context" + +type fakeDB struct{} + +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +type fakeOrgHandler struct{} + +func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) { + return "", false, nil +} + +func okCreateWithAudit(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) { + // Audit-table INSERT — should be IGNORED by the tightened regex. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces_audit (id, action) VALUES ($1, $2)`" + `, "x", "create_attempt") + // Lookup BEFORE real INSERT — correct order. + h.lookupExistingChild(ctx, name, parentID) + // Real INSERT. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name) +} +` + lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "shadow.go", []byte(src)) + if lookupPos == token.NoPos || insertPos == token.NoPos { + t.Fatalf("expected to find lookup + real INSERT, got lookupPos=%v insertPos=%v", lookupPos, insertPos) + } + // The audit-table INSERT is at line ~16 (column ~20-ish), the + // lookup is at line 19, the real INSERT is at line 21. If the + // regex regressed to prefix-match, insertPos would point at the + // audit literal at line 16, and the gate would falsely fail + // (lookup at 19 > "insert" at 16). With the tightened regex, + // insertPos correctly points at line 21, and the gate passes. + insertLine := fset.Position(insertPos).Line + lookupLine := fset.Position(lookupPos).Line + if insertLine < lookupLine { + t.Errorf("regex regressed: audit shadow at line %d swallowed real INSERT (lookup at line %d). insertPos should point at the real INSERT (line ~21), not the audit literal.", + insertLine, lookupLine) + } + if lookupPos > insertPos { + t.Errorf("synthetic source has lookup at line %d before real INSERT at line %d, gate should pass (lookupPos < insertPos), got lookupPos=%d > insertPos=%d", + lookupLine, insertLine, lookupPos, insertPos) + } +} + +// TestWorkspacesInsertRE_RejectsLookalikes pins the regex that +// discriminates the real workspaces INSERT from prefix-matching +// lookalikes. If this regex regresses to a substring match, the +// AST gate above silently false-passes when a future refactor +// shadows the real INSERT with a workspaces_audit / workspace_secrets +// / canvas_layouts literal placed earlier in source. +func TestWorkspacesInsertRE_RejectsLookalikes(t *testing.T) { + cases := []struct { + sql string + want bool + comment string + }{ + {"INSERT INTO workspaces (id, name) VALUES ($1, $2)", true, "real target"}, + {"\n\t\tINSERT INTO workspaces (id, name)\n\t\tVALUES ($1, $2)", true, "real target with leading whitespace + newlines (raw string literal shape)"}, + {"INSERT INTO workspaces_audit (id) VALUES ($1)", false, "underscore-suffix lookalike (the #2872 specific failure mode)"}, + {"INSERT INTO workspace_secrets (key, value) VALUES ($1, $2)", false, "prefix without trailing 's' (workspace_*)"}, + {"INSERT INTO workspace_channels (id) VALUES ($1)", false, "another workspace_* prefix"}, + {"INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)", false, "unrelated table that contains 'workspace' in a column ref"}, + {"UPDATE workspaces SET status='running' WHERE id=$1", false, "UPDATE shouldn't match"}, + {"SELECT * FROM workspaces WHERE id=$1", false, "SELECT shouldn't match"}, + {"-- comment about INSERT INTO workspaces (\nSELECT 1", false, "comment shouldn't match"}, + } + for _, c := range cases { + got := workspacesInsertRE.MatchString(c.sql) + if got != c.want { + t.Errorf("workspacesInsertRE.MatchString(%q) = %v, want %v (%s)", c.sql, got, c.want, c.comment) + } + } +} + +// Confirm the regex actually matches the literal currently in +// org_import.go. Pins the shape so `gofmt` reflows or trivial edits +// to the SQL string don't silently disable the gate above. +func TestWorkspacesInsertRE_MatchesActualSourceLiteral(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "org_import.go")) + if err != nil { + t.Fatalf("read org_import.go: %v", err) + } + // Strip backtick strings, find any whose content matches. + // Walk the source via parser.ParseFile to avoid string-search + // drift if the literal is reflowed. + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, filepath.Join(wd, "org_import.go"), src, parser.ParseComments) + if err != nil { + t.Fatalf("parse org_import.go: %v", err) + } + var matched bool + ast.Inspect(file, func(n ast.Node) bool { + lit, ok := n.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + raw := lit.Value + if unq, err := strconv.Unquote(raw); err == nil { + raw = unq + } + if workspacesInsertRE.MatchString(raw) { + matched = true + } + return true + }) + if !matched { + t.Fatalf("no SQL literal in org_import.go matches workspacesInsertRE — gate is dead. Either the INSERT was renamed (update the regex) or the file was restructured (review the gate logic).") + } + // strings.Contains keeps the test informative: if the regex + // stopped matching but the literal source still contains the + // magic phrase, that's a regex-side failure (test the fix above). + if !strings.Contains(string(src), "INSERT INTO workspaces") { + t.Fatalf("org_import.go has no `INSERT INTO workspaces` substring at all — schema change?") } }