Merge pull request #843 from Molecule-AI/fix/pgvector-migration-guard

fix(migrations): wrap entire pgvector migration in DO block — unblocks E2E
This commit is contained in:
Hongming Wang 2026-04-17 13:31:49 -07:00 committed by GitHub
commit 80b99ab219
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 97 additions and 17 deletions

View File

@ -3,28 +3,29 @@
-- Adds a dense-vector embedding column to agent_memories to power semantic
-- (cosine-similarity) memory recall alongside the existing FTS path.
--
-- Requires the pgvector Postgres extension. The DO block is a no-op guard:
-- if the extension is unavailable this migration exits early so a boot
-- without pgvector installed does not break the migration sweep.
-- Requires the pgvector Postgres extension. The entire migration is wrapped
-- in a single DO block so if pgvector is unavailable, ALL statements are
-- skipped (not just CREATE EXTENSION). This prevents "type vector does not
-- exist" errors on the ALTER TABLE / CREATE INDEX that follow.
--
-- Issue: #576
DO $migrate$
BEGIN
CREATE EXTENSION IF NOT EXISTS vector;
-- Nullable: rows written before pgvector is active have NULL embedding and
-- are excluded from cosine-similarity queries automatically.
ALTER TABLE agent_memories ADD COLUMN IF NOT EXISTS embedding vector(1536);
-- ivfflat approximate nearest-neighbour index for cosine similarity.
-- lists=100 is a reasonable default for tables up to ~1M rows.
-- Partial index (WHERE embedding IS NOT NULL) keeps it lean — unembedded
-- rows are skipped entirely.
CREATE INDEX IF NOT EXISTS agent_memories_embedding_idx
ON agent_memories USING ivfflat (embedding vector_cosine_ops)
WHERE embedding IS NOT NULL;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pgvector not available on this Postgres instance — 031_memories_pgvector skipped';
RETURN;
RAISE NOTICE 'pgvector not available — 031_memories_pgvector skipped: %', SQLERRM;
END $migrate$;
-- Nullable: rows written before pgvector is active have NULL embedding and
-- are excluded from cosine-similarity queries automatically.
ALTER TABLE agent_memories ADD COLUMN IF NOT EXISTS embedding vector(1536);
-- ivfflat approximate nearest-neighbour index for cosine similarity.
-- lists=100 is a reasonable default for tables up to ~1M rows.
-- Partial index (WHERE embedding IS NOT NULL) keeps it lean — unembedded
-- rows are skipped entirely.
CREATE INDEX IF NOT EXISTS agent_memories_embedding_idx
ON agent_memories USING ivfflat (embedding vector_cosine_ops)
WHERE embedding IS NOT NULL;

79
tests/e2e/test_saas_tenant.sh Executable file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env bash
# test_saas_tenant.sh — smoke test a live SaaS tenant through the Cloudflare Worker
#
# Usage: TENANT_SLUG=hongming2 bash tests/e2e/test_saas_tenant.sh
# TENANT_SLUG=hongming2 DIRECT_IP=3.144.193.40 bash tests/e2e/test_saas_tenant.sh
#
# Tests both Worker-proxied routes and (optionally) direct EC2 access.
# Exits 0 if all critical tests pass, 1 otherwise.
set -euo pipefail
SLUG="${TENANT_SLUG:?Set TENANT_SLUG=<org-slug>}"
BASE="https://${SLUG}.moleculesai.app"
DIRECT="${DIRECT_IP:-}"
PASS=0
FAIL=0
SKIP=0
check() {
local label="$1" url="$2" expect="$3"
local code
code=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 "$url" 2>/dev/null || echo "000")
if [ "$code" = "$expect" ]; then
printf " PASS %-40s %s → %s\n" "$label" "$url" "$code"
PASS=$((PASS + 1))
else
printf " FAIL %-40s %s → %s (expected %s)\n" "$label" "$url" "$code" "$expect"
FAIL=$((FAIL + 1))
fi
}
echo "=== SaaS Tenant Smoke Test: ${SLUG} ==="
echo ""
echo "--- Worker routing ---"
check "health" "$BASE/health" "200"
check "canvas root" "$BASE/" "200"
check "plugins" "$BASE/plugins" "200"
check "templates" "$BASE/templates" "200"
check "workspaces" "$BASE/workspaces" "200"
check "org/templates" "$BASE/org/templates" "200"
check "approvals/pending" "$BASE/approvals/pending" "200"
check "canvas/viewport" "$BASE/canvas/viewport" "200"
check "metrics" "$BASE/metrics" "200"
echo ""
echo "--- Error handling ---"
check "nonexistent workspace" "$BASE/workspaces/00000000-0000-0000-0000-000000000000" "401"
check "bad path" "$BASE/does-not-exist" "200" # canvas catch-all
echo ""
echo "--- WebSocket (upgrade header) ---"
ws_code=$(curl -sk -o /dev/null -w "%{http_code}" \
-H "Connection: Upgrade" -H "Upgrade: websocket" \
-H "Sec-WebSocket-Version: 13" -H "Sec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==" \
"$BASE/ws" 2>/dev/null || echo "000")
if [ "$ws_code" = "101" ] || [ "$ws_code" = "400" ]; then
printf " PASS %-40s %s → %s\n" "websocket upgrade" "$BASE/ws" "$ws_code"
PASS=$((PASS + 1))
else
printf " FAIL %-40s %s → %s (expected 101 or 400)\n" "websocket upgrade" "$BASE/ws" "$ws_code"
FAIL=$((FAIL + 1))
fi
if [ -n "$DIRECT" ]; then
echo ""
echo "--- Direct EC2 (port 8080) ---"
check "direct health" "http://${DIRECT}:8080/health" "200"
check "direct metrics" "http://${DIRECT}:8080/metrics" "200"
echo ""
echo "--- Direct Canvas (port 3000) ---"
check "direct canvas" "http://${DIRECT}:3000/" "200"
fi
echo ""
echo "=== Results: ${PASS} passed, ${FAIL} failed, ${SKIP} skipped ==="
[ "$FAIL" -eq 0 ] && echo "ALL TESTS PASSED" || echo "SOME TESTS FAILED"
exit "$FAIL"