fix(memory v2): warn at boot when cutover env half-configured
MEMORY_V2_CUTOVER=true gates the admin export/import path on the v2
plugin, but the cutoverActive() check in admin_memories.go silently
returns false when the plugin isn't wired:
func (h *AdminMemoriesHandler) cutoverActive() bool {
if os.Getenv(envMemoryV2Cutover) != "true" {
return false
}
return h.plugin != nil && h.resolver != nil
}
Two operator misconfigs hit the silent-fallback path:
1. MEMORY_V2_CUTOVER=true set, MEMORY_PLUGIN_URL unset
→ wiring.Build returns nil → handler stays on legacy SQL path
→ operator sees no error, assumes cutover is live, but every
request still writes the legacy table.
2. MEMORY_V2_CUTOVER=true set, MEMORY_PLUGIN_URL set, but plugin
unreachable at boot
→ wiring.Build still returns the bundle (intentional — circuit
breaker handles ongoing unavailability), but every cutover
write quietly falls back via the breaker.
→ only signal: legacy table keeps growing.
Both are exactly the "structurally invisible until prod" failure
mode; the only real-world detection today is "notice the legacy
table is still being written to," which no operator will check.
Add loud, distinctive WARN log lines at Build() time for both
shapes. Boot logs are operator-visible, so a half-config is
immediately obvious without needing dashboards.
Tests:
* 4 new (cutover+no-URL → warn, neither set → silent, cutover+probe-
fail → loud warn, probe-fail-without-cutover → quiet generic)
* 6 existing (still pass; pin no-warning-on-happy-path)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4b16c95450
commit
b5435b4732
@ -43,15 +43,37 @@ type Bundle struct {
|
||||
// circuit breaker handles ongoing unavailability; we don't want to
|
||||
// block workspace-server boot just because the memory plugin is
|
||||
// briefly down.
|
||||
//
|
||||
// Silent-misconfig guard: if MEMORY_V2_CUTOVER=true is set without
|
||||
// MEMORY_PLUGIN_URL, the cutoverActive() check in handlers silently
|
||||
// returns false and the legacy SQL path serves every request. The
|
||||
// operator sees no errors, no warnings, and assumes the cutover is
|
||||
// live. Log a LOUD WARN at boot when the env is half-configured so
|
||||
// the misconfig is visible in the boot log, not detectable only by
|
||||
// observing that the legacy table is still being written to.
|
||||
func Build(db *sql.DB) *Bundle {
|
||||
if os.Getenv("MEMORY_PLUGIN_URL") == "" {
|
||||
cutover := os.Getenv("MEMORY_V2_CUTOVER") == "true"
|
||||
pluginURL := os.Getenv("MEMORY_PLUGIN_URL")
|
||||
|
||||
if pluginURL == "" {
|
||||
if cutover {
|
||||
log.Printf("memory-plugin: ⚠️ MEMORY_V2_CUTOVER=true but MEMORY_PLUGIN_URL is unset — cutover is INACTIVE, legacy SQL path is serving every request. Either unset MEMORY_V2_CUTOVER or point MEMORY_PLUGIN_URL at a reachable plugin server.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
plugin := mclient.New(mclient.Config{})
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if hr, err := plugin.Boot(ctx); err != nil {
|
||||
log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err)
|
||||
// Log even louder when cutover is on — an unreachable plugin
|
||||
// during cutover means writes that the operator THINKS are
|
||||
// going to v2 will silently fall back to legacy via the
|
||||
// circuit breaker on each request. Make it impossible to miss.
|
||||
if cutover {
|
||||
log.Printf("memory-plugin: ⚠️ MEMORY_V2_CUTOVER=true and MEMORY_PLUGIN_URL=%s but /v1/health probe failed (%v). Cutover writes will fall back to legacy via circuit breaker. Verify the plugin server is reachable.", pluginURL, err)
|
||||
} else {
|
||||
log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Printf("memory-plugin: ok, capabilities=%v", hr.Capabilities)
|
||||
}
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
package wiring
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
@ -151,6 +154,84 @@ func TestNamespaceCleanupFn_PluginErrorDoesNotPanic(t *testing.T) {
|
||||
cleanup(context.Background(), "ws-1")
|
||||
}
|
||||
|
||||
// captureLogs runs fn with log output captured into a buffer, returns the
|
||||
// captured text. Restores the prior log destination on exit.
|
||||
func captureLogs(t *testing.T, fn func()) string {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
prev := log.Writer()
|
||||
log.SetOutput(&buf)
|
||||
t.Cleanup(func() { log.SetOutput(prev) })
|
||||
fn()
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// TestBuild_WarnsWhenCutoverWithoutPluginURL pins the silent-misconfig
|
||||
// guard: an operator who flips MEMORY_V2_CUTOVER=true without also
|
||||
// pointing MEMORY_PLUGIN_URL at a plugin server has just disabled the
|
||||
// cutover with no error visible. Without this WARN, the only signal
|
||||
// is "the legacy table is still being written to" — invisible to
|
||||
// every operator who doesn't explicitly check.
|
||||
func TestBuild_WarnsWhenCutoverWithoutPluginURL(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "true")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "")
|
||||
out := captureLogs(t, func() {
|
||||
if got := Build(nil); got != nil {
|
||||
t.Errorf("expected nil bundle, got %+v", got)
|
||||
}
|
||||
})
|
||||
if !strings.Contains(out, "MEMORY_V2_CUTOVER=true") || !strings.Contains(out, "MEMORY_PLUGIN_URL is unset") {
|
||||
t.Errorf("expected loud WARN about half-configured cutover; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_NoWarnWhenNeitherSet pins the happy default: an operator
|
||||
// running without the v2 plugin should not see scary warnings.
|
||||
func TestBuild_NoWarnWhenNeitherSet(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "")
|
||||
out := captureLogs(t, func() { _ = Build(nil) })
|
||||
if strings.Contains(out, "MEMORY_V2_CUTOVER") {
|
||||
t.Errorf("expected no MEMORY_V2_CUTOVER warning when env is unset; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_LoudWarnWhenCutoverAndProbeFails pins the second
|
||||
// half-config case: cutover is on AND plugin URL is set, but the
|
||||
// /v1/health probe fails (server down or wrong URL). Without this
|
||||
// loud WARN, the operator sees only the generic "probe failed" line
|
||||
// that gets emitted even when cutover is OFF — hiding the fact that
|
||||
// real cutover writes will quietly fall back via circuit breaker.
|
||||
func TestBuild_LoudWarnWhenCutoverAndProbeFails(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "true")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1") // bogus port
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
out := captureLogs(t, func() { _ = Build(db) })
|
||||
if !strings.Contains(out, "MEMORY_V2_CUTOVER=true") || !strings.Contains(out, "probe failed") {
|
||||
t.Errorf("expected loud WARN about cutover-with-failing-probe; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_QuietProbeFailWhenCutoverOff: the operator is in PRE-cutover
|
||||
// mode (plugin URL set, cutover off — they're warming up the plugin).
|
||||
// A failing probe in this state is not a misconfig — it should log the
|
||||
// generic message, NOT the loud cutover-specific one (so log noise
|
||||
// doesn't drown out real cutover misconfigs in dashboards).
|
||||
func TestBuild_QuietProbeFailWhenCutoverOff(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1")
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
out := captureLogs(t, func() { _ = Build(db) })
|
||||
if strings.Contains(out, "MEMORY_V2_CUTOVER=true") {
|
||||
t.Errorf("expected no cutover-specific warning when cutover is off; got log:\n%s", out)
|
||||
}
|
||||
if !strings.Contains(out, "probe failed") {
|
||||
t.Errorf("expected generic probe-failed log; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func pathsAndMethods(paths, methods []string) []string {
|
||||
out := make([]string, len(paths))
|
||||
for i := range paths {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user