From 412dec0d876d5511e77a237a8f33c57470ce25e5 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 5 May 2026 12:39:03 -0700 Subject: [PATCH] fix(memory-plugin): gate sidecar spawn on cutover-active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #2906 spawned the sidecar unconditionally on every tenant boot. The plugin's first migration runs \`CREATE EXTENSION vector\` which fails on tenant Postgres without pgvector preinstalled — every staging tenant redeploy aborted at the 30s health gate. CP fail-fast kept running tenants on the prior image (no outage), but the new image was DOA. Caught on staging redeploy 2026-05-05 19:23 with \`pq: extension "vector" is not available\`. Fix: only spawn the sidecar when the operator has flipped the cutover flag — \`MEMORY_V2_CUTOVER=true\` OR \`MEMORY_PLUGIN_URL\` is set. * Aligns the entrypoint to the same opt-in posture wiring.go already uses (it skips building the client when MEMORY_PLUGIN_URL is empty). * Until cutover, the sidecar isn't even running — no migration, no health gate, no boot-time pgvector dependency. * Operators activating cutover already redeploy with the new env vars set; that's when the sidecar starts. By definition they've verified pgvector is available before flipping. * MEMORY_PLUGIN_DISABLE=1 escape hatch preserved; harness fix #2915 becomes belt-and-suspenders (still respected). Both Dockerfile and entrypoint-tenant.sh updated. Behavior change for existing deployments: zero (cutover env vars still unset → sidecar still inert, but now also not running). Refs RFC #2728. Hotfix for #2906; supersedes the migration-path fragility class (the sidecar isn't doing migrations on tenants that won't use it). --- workspace-server/Dockerfile | 27 ++++++++++++++++++--------- workspace-server/entrypoint-tenant.sh | 21 +++++++++++++++------ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/workspace-server/Dockerfile b/workspace-server/Dockerfile index ecf43fab..d6754312 100644 --- a/workspace-server/Dockerfile +++ b/workspace-server/Dockerfile @@ -63,21 +63,30 @@ fi # Memory v2 sidecar (built-in postgres plugin). Co-located with the # main server so operators flipping MEMORY_V2_CUTOVER=true don't need -# to provision a separate service. Stays inert at the protocol layer -# until that env var is set — the workspace-server's wiring.go skips -# building the client without MEMORY_PLUGIN_URL, so the running plugin -# is a no-op for traffic. +# to provision a separate service. # -# Env defaults: +# Spawn-gating: only start the sidecar when the operator has indicated +# they want it — either MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set. +# Without that signal, the sidecar adds zero value (the platform's +# wiring.go skips building the client too) but pays a real cost: the +# plugin's first migration runs `CREATE EXTENSION vector`, which fails +# on tenant Postgres without pgvector preinstalled and aborts container +# boot via the 30s health gate. Caught on staging redeploy 2026-05-05. +# +# Env defaults (when sidecar IS spawned): # MEMORY_PLUGIN_DATABASE_URL = $DATABASE_URL (share existing Postgres; # plugin's `memory_namespaces` / `memory_records` tables coexist # with `agent_memories` and the rest of the platform schema — # no conflicts. Operator can override with a separate URL.) -# MEMORY_PLUGIN_LISTEN_ADDR = :9100 +# MEMORY_PLUGIN_LISTEN_ADDR = 127.0.0.1:9100 # -# Set MEMORY_PLUGIN_DISABLE=1 to skip launching the sidecar entirely -# (e.g. an operator running the plugin externally on a separate host). -if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$DATABASE_URL" ]; then +# Set MEMORY_PLUGIN_DISABLE=1 to force-skip the sidecar even with +# cutover env set (e.g. running the plugin externally on a separate host). +memory_plugin_wanted="" +if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then + memory_plugin_wanted=1 +fi +if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then : "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}" : "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}" export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR diff --git a/workspace-server/entrypoint-tenant.sh b/workspace-server/entrypoint-tenant.sh index 8059cc1c..0f2d6dde 100644 --- a/workspace-server/entrypoint-tenant.sh +++ b/workspace-server/entrypoint-tenant.sh @@ -21,14 +21,23 @@ PORT=3000 HOSTNAME=0.0.0.0 node server.js & CANVAS_PID=$! # Memory v2 sidecar (built-in postgres plugin). See Dockerfile entrypoint -# comment for rationale. Stays inert at the protocol layer until the -# operator sets MEMORY_V2_CUTOVER=true; running it is cheap. +# comment for rationale. # -# Defaults the plugin's DATABASE_URL to the tenant's DATABASE_URL so -# operators don't need to configure two of them. Plugin tables coexist -# with the platform schema. +# Spawn-gating: only start the sidecar when the operator has indicated +# they want it (MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set). +# Without that signal, the sidecar adds zero value and risks aborting +# tenant boot via the 30s health gate when the tenant Postgres lacks +# pgvector. Caught on staging redeploy 2026-05-05: +# pq: extension "vector" is not available +# +# Defaults (when sidecar IS spawned): MEMORY_PLUGIN_DATABASE_URL +# falls back to the tenant's DATABASE_URL. MEMORY_PLUGIN_PID="" -if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$DATABASE_URL" ]; then +memory_plugin_wanted="" +if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then + memory_plugin_wanted=1 +fi +if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then : "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}" : "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}" export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR