Merge pull request #2803 from Molecule-AI/fix/memory-cutover-misconfig-warn
fix(memory v2): warn at boot when cutover env half-configured
This commit is contained in:
commit
461e5dcad0
@ -43,15 +43,37 @@ type Bundle struct {
|
||||
// circuit breaker handles ongoing unavailability; we don't want to
|
||||
// block workspace-server boot just because the memory plugin is
|
||||
// briefly down.
|
||||
//
|
||||
// Silent-misconfig guard: if MEMORY_V2_CUTOVER=true is set without
|
||||
// MEMORY_PLUGIN_URL, the cutoverActive() check in handlers silently
|
||||
// returns false and the legacy SQL path serves every request. The
|
||||
// operator sees no errors, no warnings, and assumes the cutover is
|
||||
// live. Log a LOUD WARN at boot when the env is half-configured so
|
||||
// the misconfig is visible in the boot log, not detectable only by
|
||||
// observing that the legacy table is still being written to.
|
||||
func Build(db *sql.DB) *Bundle {
|
||||
if os.Getenv("MEMORY_PLUGIN_URL") == "" {
|
||||
cutover := os.Getenv("MEMORY_V2_CUTOVER") == "true"
|
||||
pluginURL := os.Getenv("MEMORY_PLUGIN_URL")
|
||||
|
||||
if pluginURL == "" {
|
||||
if cutover {
|
||||
log.Printf("memory-plugin: ⚠️ MEMORY_V2_CUTOVER=true but MEMORY_PLUGIN_URL is unset — cutover is INACTIVE, legacy SQL path is serving every request. Either unset MEMORY_V2_CUTOVER or point MEMORY_PLUGIN_URL at a reachable plugin server.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
plugin := mclient.New(mclient.Config{})
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if hr, err := plugin.Boot(ctx); err != nil {
|
||||
log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err)
|
||||
// Log even louder when cutover is on — an unreachable plugin
|
||||
// during cutover means writes that the operator THINKS are
|
||||
// going to v2 will silently fall back to legacy via the
|
||||
// circuit breaker on each request. Make it impossible to miss.
|
||||
if cutover {
|
||||
log.Printf("memory-plugin: ⚠️ MEMORY_V2_CUTOVER=true and MEMORY_PLUGIN_URL=%s but /v1/health probe failed (%v). Cutover writes will fall back to legacy via circuit breaker. Verify the plugin server is reachable.", pluginURL, err)
|
||||
} else {
|
||||
log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Printf("memory-plugin: ok, capabilities=%v", hr.Capabilities)
|
||||
}
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
package wiring
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
@ -151,6 +154,84 @@ func TestNamespaceCleanupFn_PluginErrorDoesNotPanic(t *testing.T) {
|
||||
cleanup(context.Background(), "ws-1")
|
||||
}
|
||||
|
||||
// captureLogs runs fn with log output captured into a buffer, returns the
|
||||
// captured text. Restores the prior log destination on exit.
|
||||
func captureLogs(t *testing.T, fn func()) string {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
prev := log.Writer()
|
||||
log.SetOutput(&buf)
|
||||
t.Cleanup(func() { log.SetOutput(prev) })
|
||||
fn()
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// TestBuild_WarnsWhenCutoverWithoutPluginURL pins the silent-misconfig
|
||||
// guard: an operator who flips MEMORY_V2_CUTOVER=true without also
|
||||
// pointing MEMORY_PLUGIN_URL at a plugin server has just disabled the
|
||||
// cutover with no error visible. Without this WARN, the only signal
|
||||
// is "the legacy table is still being written to" — invisible to
|
||||
// every operator who doesn't explicitly check.
|
||||
func TestBuild_WarnsWhenCutoverWithoutPluginURL(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "true")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "")
|
||||
out := captureLogs(t, func() {
|
||||
if got := Build(nil); got != nil {
|
||||
t.Errorf("expected nil bundle, got %+v", got)
|
||||
}
|
||||
})
|
||||
if !strings.Contains(out, "MEMORY_V2_CUTOVER=true") || !strings.Contains(out, "MEMORY_PLUGIN_URL is unset") {
|
||||
t.Errorf("expected loud WARN about half-configured cutover; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_NoWarnWhenNeitherSet pins the happy default: an operator
|
||||
// running without the v2 plugin should not see scary warnings.
|
||||
func TestBuild_NoWarnWhenNeitherSet(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "")
|
||||
out := captureLogs(t, func() { _ = Build(nil) })
|
||||
if strings.Contains(out, "MEMORY_V2_CUTOVER") {
|
||||
t.Errorf("expected no MEMORY_V2_CUTOVER warning when env is unset; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_LoudWarnWhenCutoverAndProbeFails pins the second
|
||||
// half-config case: cutover is on AND plugin URL is set, but the
|
||||
// /v1/health probe fails (server down or wrong URL). Without this
|
||||
// loud WARN, the operator sees only the generic "probe failed" line
|
||||
// that gets emitted even when cutover is OFF — hiding the fact that
|
||||
// real cutover writes will quietly fall back via circuit breaker.
|
||||
func TestBuild_LoudWarnWhenCutoverAndProbeFails(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "true")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1") // bogus port
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
out := captureLogs(t, func() { _ = Build(db) })
|
||||
if !strings.Contains(out, "MEMORY_V2_CUTOVER=true") || !strings.Contains(out, "probe failed") {
|
||||
t.Errorf("expected loud WARN about cutover-with-failing-probe; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_QuietProbeFailWhenCutoverOff: the operator is in PRE-cutover
|
||||
// mode (plugin URL set, cutover off — they're warming up the plugin).
|
||||
// A failing probe in this state is not a misconfig — it should log the
|
||||
// generic message, NOT the loud cutover-specific one (so log noise
|
||||
// doesn't drown out real cutover misconfigs in dashboards).
|
||||
func TestBuild_QuietProbeFailWhenCutoverOff(t *testing.T) {
|
||||
t.Setenv("MEMORY_V2_CUTOVER", "")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1")
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
out := captureLogs(t, func() { _ = Build(db) })
|
||||
if strings.Contains(out, "MEMORY_V2_CUTOVER=true") {
|
||||
t.Errorf("expected no cutover-specific warning when cutover is off; got log:\n%s", out)
|
||||
}
|
||||
if !strings.Contains(out, "probe failed") {
|
||||
t.Errorf("expected generic probe-failed log; got log:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func pathsAndMethods(paths, methods []string) []string {
|
||||
out := make([]string, len(paths))
|
||||
for i := range paths {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user