diff --git a/workspace-server/internal/memory/wiring/wiring.go b/workspace-server/internal/memory/wiring/wiring.go index 6fdad2ba..12badfdc 100644 --- a/workspace-server/internal/memory/wiring/wiring.go +++ b/workspace-server/internal/memory/wiring/wiring.go @@ -43,15 +43,37 @@ type Bundle struct { // circuit breaker handles ongoing unavailability; we don't want to // block workspace-server boot just because the memory plugin is // briefly down. +// +// Silent-misconfig guard: if MEMORY_V2_CUTOVER=true is set without +// MEMORY_PLUGIN_URL, the cutoverActive() check in handlers silently +// returns false and the legacy SQL path serves every request. The +// operator sees no errors, no warnings, and assumes the cutover is +// live. Log a LOUD WARN at boot when the env is half-configured so +// the misconfig is visible in the boot log, not detectable only by +// observing that the legacy table is still being written to. func Build(db *sql.DB) *Bundle { - if os.Getenv("MEMORY_PLUGIN_URL") == "" { + cutover := os.Getenv("MEMORY_V2_CUTOVER") == "true" + pluginURL := os.Getenv("MEMORY_PLUGIN_URL") + + if pluginURL == "" { + if cutover { + log.Printf("memory-plugin: ⚠️ MEMORY_V2_CUTOVER=true but MEMORY_PLUGIN_URL is unset — cutover is INACTIVE, legacy SQL path is serving every request. Either unset MEMORY_V2_CUTOVER or point MEMORY_PLUGIN_URL at a reachable plugin server.") + } return nil } plugin := mclient.New(mclient.Config{}) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() if hr, err := plugin.Boot(ctx); err != nil { - log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err) + // Log even louder when cutover is on — an unreachable plugin + // during cutover means writes that the operator THINKS are + // going to v2 will silently fall back to legacy via the + // circuit breaker on each request. Make it impossible to miss. + if cutover { + log.Printf("memory-plugin: ⚠️ MEMORY_V2_CUTOVER=true and MEMORY_PLUGIN_URL=%s but /v1/health probe failed (%v). Cutover writes will fall back to legacy via circuit breaker. Verify the plugin server is reachable.", pluginURL, err) + } else { + log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err) + } } else { log.Printf("memory-plugin: ok, capabilities=%v", hr.Capabilities) } diff --git a/workspace-server/internal/memory/wiring/wiring_test.go b/workspace-server/internal/memory/wiring/wiring_test.go index ee7a6840..d8da1efb 100644 --- a/workspace-server/internal/memory/wiring/wiring_test.go +++ b/workspace-server/internal/memory/wiring/wiring_test.go @@ -1,9 +1,12 @@ package wiring import ( + "bytes" "context" + "log" "net/http" "net/http/httptest" + "strings" "sync" "testing" @@ -151,6 +154,84 @@ func TestNamespaceCleanupFn_PluginErrorDoesNotPanic(t *testing.T) { cleanup(context.Background(), "ws-1") } +// captureLogs runs fn with log output captured into a buffer, returns the +// captured text. Restores the prior log destination on exit. +func captureLogs(t *testing.T, fn func()) string { + t.Helper() + var buf bytes.Buffer + prev := log.Writer() + log.SetOutput(&buf) + t.Cleanup(func() { log.SetOutput(prev) }) + fn() + return buf.String() +} + +// TestBuild_WarnsWhenCutoverWithoutPluginURL pins the silent-misconfig +// guard: an operator who flips MEMORY_V2_CUTOVER=true without also +// pointing MEMORY_PLUGIN_URL at a plugin server has just disabled the +// cutover with no error visible. Without this WARN, the only signal +// is "the legacy table is still being written to" — invisible to +// every operator who doesn't explicitly check. +func TestBuild_WarnsWhenCutoverWithoutPluginURL(t *testing.T) { + t.Setenv("MEMORY_V2_CUTOVER", "true") + t.Setenv("MEMORY_PLUGIN_URL", "") + out := captureLogs(t, func() { + if got := Build(nil); got != nil { + t.Errorf("expected nil bundle, got %+v", got) + } + }) + if !strings.Contains(out, "MEMORY_V2_CUTOVER=true") || !strings.Contains(out, "MEMORY_PLUGIN_URL is unset") { + t.Errorf("expected loud WARN about half-configured cutover; got log:\n%s", out) + } +} + +// TestBuild_NoWarnWhenNeitherSet pins the happy default: an operator +// running without the v2 plugin should not see scary warnings. +func TestBuild_NoWarnWhenNeitherSet(t *testing.T) { + t.Setenv("MEMORY_V2_CUTOVER", "") + t.Setenv("MEMORY_PLUGIN_URL", "") + out := captureLogs(t, func() { _ = Build(nil) }) + if strings.Contains(out, "MEMORY_V2_CUTOVER") { + t.Errorf("expected no MEMORY_V2_CUTOVER warning when env is unset; got log:\n%s", out) + } +} + +// TestBuild_LoudWarnWhenCutoverAndProbeFails pins the second +// half-config case: cutover is on AND plugin URL is set, but the +// /v1/health probe fails (server down or wrong URL). Without this +// loud WARN, the operator sees only the generic "probe failed" line +// that gets emitted even when cutover is OFF — hiding the fact that +// real cutover writes will quietly fall back via circuit breaker. +func TestBuild_LoudWarnWhenCutoverAndProbeFails(t *testing.T) { + t.Setenv("MEMORY_V2_CUTOVER", "true") + t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1") // bogus port + db, _, _ := sqlmock.New() + defer db.Close() + out := captureLogs(t, func() { _ = Build(db) }) + if !strings.Contains(out, "MEMORY_V2_CUTOVER=true") || !strings.Contains(out, "probe failed") { + t.Errorf("expected loud WARN about cutover-with-failing-probe; got log:\n%s", out) + } +} + +// TestBuild_QuietProbeFailWhenCutoverOff: the operator is in PRE-cutover +// mode (plugin URL set, cutover off — they're warming up the plugin). +// A failing probe in this state is not a misconfig — it should log the +// generic message, NOT the loud cutover-specific one (so log noise +// doesn't drown out real cutover misconfigs in dashboards). +func TestBuild_QuietProbeFailWhenCutoverOff(t *testing.T) { + t.Setenv("MEMORY_V2_CUTOVER", "") + t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1") + db, _, _ := sqlmock.New() + defer db.Close() + out := captureLogs(t, func() { _ = Build(db) }) + if strings.Contains(out, "MEMORY_V2_CUTOVER=true") { + t.Errorf("expected no cutover-specific warning when cutover is off; got log:\n%s", out) + } + if !strings.Contains(out, "probe failed") { + t.Errorf("expected generic probe-failed log; got log:\n%s", out) + } +} + func pathsAndMethods(paths, methods []string) []string { out := make([]string, len(paths)) for i := range paths {