From 6201d12533fc71215aa54418c66a3a3f2a0770e9 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 5 May 2026 11:57:37 -0700 Subject: [PATCH] fix(memory-plugin): embed migrations into binary via go:embed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #2906 shipped the binary at /memory-plugin without the migrations directory. The plugin's runMigrations() resolved a relative path \`cmd/memory-plugin-postgres/migrations\` that exists in the build context but NOT in the runtime image. Every staging tenant boot failed with: memory-plugin-postgres: migrate: read migrations dir "cmd/memory-plugin-postgres/migrations": open cmd/memory-plugin-postgres/migrations: no such file or directory memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot Caught on the staging redeploy fleet job after #2906 merged. Tenants stayed on the old image (CP redeploy correctly fail-fasted) but the new image was broken. Fix: \`//go:embed migrations/*.up.sql\` bundles the migrations into the binary at build time. No filesystem path dependency at runtime. * \`embed.FS\` embeds the .up.sql files alongside the binary. * runMigrations() reads from migrationsFS by default; MEMORY_PLUGIN_MIGRATIONS_DIR override path preserved for operators shipping custom migrations. * Names sorted alphabetically — pinned by a test so a future \`002_*.up.sql\` is guaranteed to run after \`001_*.up.sql\`. Tests: * TestMigrationsEmbedded_ContainsCreateTable — pins that the embed pattern matched files AND those files contain CREATE TABLE (catches both empty-pattern and wrong-files-embedded). * TestRunMigrationsFromEmbed_OrderingIsAlphabetic — pins sorted application order. Verified locally: \`go build\` succeeds, binary 9.3MB, \`strings\` shows the embedded SQL. Refs RFC #2728. Hotfix for #2906. --- .../cmd/memory-plugin-postgres/main.go | 87 +++++++++++++++---- .../migrations_embed_test.go | 72 +++++++++++++++ 2 files changed, 141 insertions(+), 18 deletions(-) create mode 100644 workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go diff --git a/workspace-server/cmd/memory-plugin-postgres/main.go b/workspace-server/cmd/memory-plugin-postgres/main.go index 148c1dd4..2a1b2dee 100644 --- a/workspace-server/cmd/memory-plugin-postgres/main.go +++ b/workspace-server/cmd/memory-plugin-postgres/main.go @@ -10,6 +10,7 @@ package main import ( "context" "database/sql" + "embed" "errors" "fmt" "log" @@ -17,6 +18,7 @@ import ( "net/http" "os" "os/signal" + "sort" "strings" "syscall" "time" @@ -26,6 +28,16 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin" ) +// migrationsFS bundles the .up.sql files into the binary at build time +// so the prebuilt image doesn't need the source tree at runtime. The +// prior `os.ReadDir("cmd/memory-plugin-postgres/migrations")` path +// only resolved during `go test` from the repo root — in the published +// image the path didn't exist and boot failed after the 30s health gate +// (caught on staging redeploy 2026-05-05 after PR #2906). +// +//go:embed migrations/*.up.sql +var migrationsFS embed.FS + const ( envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL" envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR" @@ -149,32 +161,71 @@ func openDB(databaseURL string) (*sql.DB, error) { return db, nil } -// runMigrations applies the schema migrations bundled at -// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot. +// runMigrations applies the schema migrations bundled into the binary +// via go:embed (see migrationsFS at the top of this file). Idempotent +// on repeat boot — every migration file uses CREATE … IF NOT EXISTS. // -// Implementation note: rather than embedding the full migrate engine, -// we read the migration files at boot from a known relative path. The -// down migrations are deliberately NOT applied here — that's a manual -// operator action. This keeps the binary tiny and avoids dragging in -// golang-migrate's drivers. +// The down migrations are deliberately NOT applied here — that's a +// manual operator action. This keeps the binary tiny and avoids +// dragging in golang-migrate's drivers. +// +// MEMORY_PLUGIN_MIGRATIONS_DIR (filesystem path) is honored as an +// override for operators who need to ship custom migrations alongside +// the binary without rebuilding. When unset (the common case) we read +// from the embedded FS. func runMigrations(db *sql.DB) error { - // Find the migrations directory. In `go run` mode it's relative - // to the cmd dir; in the prebuilt binary case it's expected next - // to the binary OR via env var override. - dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR") - if dir == "" { - // Best-effort: try the cwd-relative path that works for `go test`. - dir = "cmd/memory-plugin-postgres/migrations" + if dir := strings.TrimSpace(os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")); dir != "" { + return runMigrationsFromDisk(db, dir) } - entries, err := os.ReadDir(dir) + return runMigrationsFromEmbed(db) +} + +// runMigrationsFromEmbed applies the *.up.sql files bundled into the +// binary at build time. Order is alphabetical (matches the on-disk +// behavior of os.ReadDir on Linux for the same set of names). +func runMigrationsFromEmbed(db *sql.DB) error { + entries, err := migrationsFS.ReadDir("migrations") if err != nil { - return fmt.Errorf("read migrations dir %q: %w", dir, err) + return fmt.Errorf("read embedded migrations: %w", err) } + names := make([]string, 0, len(entries)) for _, e := range entries { if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { continue } - path := dir + "/" + e.Name() + names = append(names, e.Name()) + } + sort.Strings(names) + for _, name := range names { + data, err := migrationsFS.ReadFile("migrations/" + name) + if err != nil { + return fmt.Errorf("read embedded %q: %w", name, err) + } + if _, err := db.Exec(string(data)); err != nil { + return fmt.Errorf("apply %q: %w", name, err) + } + log.Printf("applied embedded migration %s", name) + } + return nil +} + +// runMigrationsFromDisk preserves the legacy filesystem-path mode for +// operator-supplied custom migrations. +func runMigrationsFromDisk(db *sql.DB, dir string) error { + entries, err := os.ReadDir(dir) + if err != nil { + return fmt.Errorf("read migrations dir %q: %w", dir, err) + } + names := make([]string, 0, len(entries)) + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { + continue + } + names = append(names, e.Name()) + } + sort.Strings(names) + for _, name := range names { + path := dir + "/" + name data, err := os.ReadFile(path) if err != nil { return fmt.Errorf("read %q: %w", path, err) @@ -182,7 +233,7 @@ func runMigrations(db *sql.DB) error { if _, err := db.Exec(string(data)); err != nil { return fmt.Errorf("apply %q: %w", path, err) } - log.Printf("applied migration %s", e.Name()) + log.Printf("applied disk migration %s (from %s)", name, dir) } return nil } diff --git a/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go b/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go new file mode 100644 index 00000000..f2f0b785 --- /dev/null +++ b/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go @@ -0,0 +1,72 @@ +package main + +import ( + "strings" + "testing" +) + +// TestMigrationsEmbedded_ContainsCreateTable pins that the migrations +// are bundled into the binary at build time, NOT loaded from a +// filesystem path that doesn't exist at runtime in the published image. +// +// Pre-fix: PR #2906 shipped the binary without the migrations dir; +// `os.ReadDir("cmd/memory-plugin-postgres/migrations")` errored on every +// tenant boot, the 30s health gate aborted the container, and the +// staging redeploy fleet job marked all tenants as failed. Embedding +// the migrations into the binary removes the runtime path entirely. +func TestMigrationsEmbedded_ContainsCreateTable(t *testing.T) { + entries, err := migrationsFS.ReadDir("migrations") + if err != nil { + t.Fatalf("embedded migrations dir unreadable: %v", err) + } + if len(entries) == 0 { + t.Fatal("embedded migrations dir is empty — go:embed pattern matched no files") + } + + var seenUp bool + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { + continue + } + seenUp = true + data, err := migrationsFS.ReadFile("migrations/" + e.Name()) + if err != nil { + t.Errorf("read embedded %q: %v", e.Name(), err) + continue + } + if !strings.Contains(string(data), "CREATE TABLE") { + t.Errorf("embedded %q has no CREATE TABLE — wrong file embedded?", e.Name()) + } + } + if !seenUp { + t.Fatal("no *.up.sql in embedded migrations — runtime would have no schema to apply") + } +} + +// TestRunMigrationsFromEmbed_OrderingIsAlphabetic pins that we apply +// migrations in deterministic alphabetical order, not in whatever +// arbitrary order migrationsFS.ReadDir happens to return. With one +// migration today this is moot, but a future second migration ('002_…') +// MUST run after '001_…' or the schema is broken. +// +// We can't easily exercise db.Exec here (no test DB); instead pin the +// sort step on the directory listing itself. +func TestRunMigrationsFromEmbed_OrderingIsAlphabetic(t *testing.T) { + entries, err := migrationsFS.ReadDir("migrations") + if err != nil { + t.Fatalf("embedded migrations dir unreadable: %v", err) + } + var names []string + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { + continue + } + names = append(names, e.Name()) + } + for i := 1; i < len(names); i++ { + if names[i-1] > names[i] { + t.Errorf("ReadDir returned non-sorted names; runMigrationsFromEmbed must sort. "+ + "Got %q before %q", names[i-1], names[i]) + } + } +}