Some checks failed
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 28s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Has been skipped
CI / Detect changes (pull_request) Successful in 58s
Harness Replays / detect-changes (pull_request) Successful in 17s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 58s
E2E API Smoke Test / detect-changes (pull_request) Successful in 1m0s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 14s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 54s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 42s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m15s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Successful in 1m50s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 16s
qa-review / approved (pull_request) Failing after 15s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 2m0s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m36s
lint-mask-pr-atomicity / lint-mask-pr-atomicity (pull_request) Successful in 2m8s
gate-check-v3 / gate-check (pull_request) Successful in 32s
security-review / approved (pull_request) Failing after 18s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 41s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 2m11s
sop-checklist-gate / gate (pull_request) Successful in 17s
Harness Replays / Harness Replays (pull_request) Successful in 5s
sop-tier-check / tier-check (pull_request) Successful in 22s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 20s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 14s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 10s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m42s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 3m53s
CI / Python Lint & Test (pull_request) Successful in 7m18s
CI / Canvas (Next.js) (pull_request) Successful in 11m54s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Platform (Go) (pull_request) Successful in 12m45s
CI / all-required (pull_request) Successful in 3s
sop-checklist / all-items-acked (pull_request) acked: 7/7
audit-force-merge / audit (pull_request) Successful in 4s
308 lines
9.4 KiB
Go
308 lines
9.4 KiB
Go
// memory-backfill is a one-shot CLI that copies rows from the legacy
|
|
// agent_memories table into the v2 plugin via its HTTP API.
|
|
//
|
|
// Idempotent on re-run: the backfill passes each source row's UUID
|
|
// to the plugin's MemoryWrite.ID field, and the plugin upserts on
|
|
// conflict. Re-running the backfill (whole or partial) updates rows
|
|
// in place rather than duplicating.
|
|
//
|
|
// Usage:
|
|
//
|
|
// memory-backfill -dry-run # count + diff
|
|
// memory-backfill -apply # actually copy
|
|
// memory-backfill -apply -limit=10000 # cap rows per run
|
|
// memory-backfill -apply -workspace=<uuid> # one workspace only
|
|
//
|
|
// Required env:
|
|
//
|
|
// DATABASE_URL — workspace-server DB (read agent_memories)
|
|
// MEMORY_PLUGIN_URL — target plugin (write memory_records)
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
_ "github.com/lib/pq"
|
|
|
|
mclient "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
|
)
|
|
|
|
const defaultLimit = 1000000 // effectively unlimited; cap keeps SQL pageable
|
|
|
|
func main() {
|
|
if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
|
|
log.Fatalf("memory-backfill: %v", err)
|
|
}
|
|
}
|
|
|
|
// run is extracted so tests can drive it with synthesized argv +
|
|
// captured stdout/stderr. Returns nil on success.
|
|
func run(argv []string, stdout, stderr *os.File) error {
|
|
fs := flag.NewFlagSet("memory-backfill", flag.ContinueOnError)
|
|
fs.SetOutput(stderr)
|
|
dryRun := fs.Bool("dry-run", false, "count + diff only, no writes")
|
|
apply := fs.Bool("apply", false, "actually copy rows to the plugin")
|
|
verify := fs.Bool("verify", false, "post-apply parity check: random-sample N workspaces, diff agent_memories vs plugin search")
|
|
verifySample := fs.Int("verify-sample", 50, "number of workspaces to sample in -verify mode")
|
|
workspace := fs.String("workspace", "", "limit to a single workspace UUID (empty = all)")
|
|
limit := fs.Int("limit", defaultLimit, "max rows to process this run")
|
|
if err := fs.Parse(argv); err != nil {
|
|
return err
|
|
}
|
|
modesPicked := 0
|
|
if *dryRun {
|
|
modesPicked++
|
|
}
|
|
if *apply {
|
|
modesPicked++
|
|
}
|
|
if *verify {
|
|
modesPicked++
|
|
}
|
|
if modesPicked != 1 {
|
|
return errors.New("specify exactly one of -dry-run, -apply, or -verify")
|
|
}
|
|
|
|
dbURL := os.Getenv("DATABASE_URL")
|
|
if dbURL == "" {
|
|
return errors.New("DATABASE_URL is required")
|
|
}
|
|
pluginURL := os.Getenv("MEMORY_PLUGIN_URL")
|
|
if pluginURL == "" {
|
|
return errors.New("MEMORY_PLUGIN_URL is required")
|
|
}
|
|
|
|
db, err := sql.Open("postgres", dbURL)
|
|
if err != nil {
|
|
return fmt.Errorf("open db: %w", err)
|
|
}
|
|
defer db.Close()
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
if err := db.PingContext(ctx); err != nil {
|
|
return fmt.Errorf("ping db: %w", err)
|
|
}
|
|
|
|
plugin := mclient.New(mclient.Config{BaseURL: pluginURL})
|
|
resolver := namespace.New(db)
|
|
|
|
if *verify {
|
|
vcfg := verifyConfig{
|
|
DB: db,
|
|
Plugin: plugin,
|
|
Resolver: namespaceResolverAdapter{resolver},
|
|
SampleSize: *verifySample,
|
|
WorkspaceID: *workspace,
|
|
}
|
|
report, err := verifyParity(context.Background(), vcfg, stdout)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fmt.Fprintf(stdout, "\nVerify complete: workspaces_sampled=%d matches=%d mismatches=%d errors=%d\n",
|
|
report.WorkspacesSampled, report.Matches, report.Mismatches, report.Errors)
|
|
if report.Mismatches > 0 || report.Errors > 0 {
|
|
return fmt.Errorf("verify found %d mismatches and %d errors", report.Mismatches, report.Errors)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
cfg := backfillConfig{
|
|
DB: db,
|
|
Plugin: plugin,
|
|
Resolver: resolver,
|
|
WorkspaceID: *workspace,
|
|
Limit: *limit,
|
|
DryRun: *dryRun,
|
|
}
|
|
stats, err := backfill(context.Background(), cfg, stdout)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fmt.Fprintf(stdout, "\nBackfill complete: scanned=%d copied=%d skipped=%d errors=%d\n",
|
|
stats.Scanned, stats.Copied, stats.Skipped, stats.Errors)
|
|
return nil
|
|
}
|
|
|
|
// backfillStats accumulates the counters the CLI reports.
|
|
type backfillStats struct {
|
|
Scanned int
|
|
Copied int
|
|
Skipped int
|
|
Errors int
|
|
}
|
|
|
|
// backfillConfig is the typed dependency bundle. Tests inject stubs
|
|
// for Plugin and Resolver; production wires real client + resolver.
|
|
type backfillConfig struct {
|
|
DB *sql.DB
|
|
Plugin backfillPlugin
|
|
Resolver backfillResolver
|
|
WorkspaceID string
|
|
Limit int
|
|
DryRun bool
|
|
}
|
|
|
|
// backfillPlugin is the slice of memory-plugin client we call.
|
|
type backfillPlugin interface {
|
|
UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
|
|
CommitMemory(ctx context.Context, namespace string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
|
|
}
|
|
|
|
// backfillResolver lets the backfill compute namespace strings the
|
|
// same way the live MCP layer does.
|
|
type backfillResolver interface {
|
|
WritableNamespaces(ctx context.Context, workspaceID string) ([]namespace.Namespace, error)
|
|
}
|
|
|
|
// backfill is the workhorse. Iterates agent_memories, maps each row's
|
|
// scope to a v2 namespace via the resolver, and POSTs to the plugin.
|
|
// Returns final stats. Stops after Limit rows.
|
|
func backfill(ctx context.Context, cfg backfillConfig, stdout *os.File) (*backfillStats, error) {
|
|
stats := &backfillStats{}
|
|
|
|
query := `
|
|
SELECT id, workspace_id, content, scope, created_at
|
|
FROM agent_memories
|
|
`
|
|
args := []interface{}{}
|
|
if cfg.WorkspaceID != "" {
|
|
query += ` WHERE workspace_id = $1`
|
|
args = append(args, cfg.WorkspaceID)
|
|
}
|
|
query += ` ORDER BY created_at ASC LIMIT $` + fmt.Sprintf("%d", len(args)+1)
|
|
args = append(args, cfg.Limit)
|
|
|
|
rows, err := cfg.DB.QueryContext(ctx, query, args...)
|
|
if err != nil {
|
|
return stats, fmt.Errorf("query agent_memories: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
stats.Scanned++
|
|
var (
|
|
id, workspaceID, content, scope string
|
|
createdAt time.Time
|
|
)
|
|
if err := rows.Scan(&id, &workspaceID, &content, &scope, &createdAt); err != nil {
|
|
fmt.Fprintf(stdout, "scan: %v\n", err)
|
|
stats.Errors++
|
|
continue
|
|
}
|
|
|
|
ns, err := mapScopeToNamespace(ctx, cfg.Resolver, workspaceID, scope)
|
|
if err != nil {
|
|
fmt.Fprintf(stdout, "[skip] id=%s workspace=%s: %v\n", id, workspaceID, err)
|
|
stats.Skipped++
|
|
continue
|
|
}
|
|
|
|
if cfg.DryRun {
|
|
fmt.Fprintf(stdout, "[dry] id=%s scope=%s → ns=%s\n", id, scope, ns)
|
|
stats.Copied++ // would-have-copied
|
|
continue
|
|
}
|
|
|
|
// Ensure the namespace exists before posting memories. Plugin's
|
|
// UpsertNamespace is idempotent so calling per-row is wasteful
|
|
// but safe; for v1 we accept the chattiness.
|
|
if _, err := cfg.Plugin.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{
|
|
Kind: namespaceKindFromString(scope),
|
|
}); err != nil {
|
|
fmt.Fprintf(stdout, "[err-ns] id=%s ns=%s: %v\n", id, ns, err)
|
|
stats.Errors++
|
|
continue
|
|
}
|
|
|
|
// Pass the source row's UUID as the idempotency key so re-runs
|
|
// upsert in place. Without this, retries would duplicate every
|
|
// memory.
|
|
if _, err := cfg.Plugin.CommitMemory(ctx, ns, contract.MemoryWrite{
|
|
ID: id,
|
|
Content: content,
|
|
Kind: contract.MemoryKindFact,
|
|
Source: contract.MemorySourceAgent,
|
|
}); err != nil {
|
|
fmt.Fprintf(stdout, "[err-mem] id=%s ns=%s: %v\n", id, ns, err)
|
|
stats.Errors++
|
|
continue
|
|
}
|
|
stats.Copied++
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return stats, fmt.Errorf("iterate rows: %w", err)
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
// mapScopeToNamespace mirrors the legacy-shim translation. The
|
|
// backfill needs the SAME mapping the runtime uses so reads work
|
|
// after cutover.
|
|
func mapScopeToNamespace(ctx context.Context, r backfillResolver, workspaceID, scope string) (string, error) {
|
|
writable, err := r.WritableNamespaces(ctx, workspaceID)
|
|
if err != nil {
|
|
return "", fmt.Errorf("resolve writable: %w", err)
|
|
}
|
|
var wantKind contract.NamespaceKind
|
|
switch scope {
|
|
case "LOCAL":
|
|
wantKind = contract.NamespaceKindWorkspace
|
|
case "TEAM":
|
|
wantKind = contract.NamespaceKindTeam
|
|
case "GLOBAL":
|
|
wantKind = contract.NamespaceKindOrg
|
|
default:
|
|
return "", fmt.Errorf("unknown scope %q", scope)
|
|
}
|
|
for _, ns := range writable {
|
|
if ns.Kind == wantKind {
|
|
return ns.Name, nil
|
|
}
|
|
}
|
|
return "", fmt.Errorf("no writable namespace of kind %s for workspace %s", wantKind, workspaceID)
|
|
}
|
|
|
|
// namespaceKindFromString returns the contract.NamespaceKind for a
|
|
// legacy scope value. Unknown scopes default to "workspace" so the
|
|
// backfill never aborts on an unexpected row.
|
|
func namespaceKindFromString(scope string) contract.NamespaceKind {
|
|
switch strings.ToUpper(scope) {
|
|
case "TEAM":
|
|
return contract.NamespaceKindTeam
|
|
case "GLOBAL":
|
|
return contract.NamespaceKindOrg
|
|
default:
|
|
return contract.NamespaceKindWorkspace
|
|
}
|
|
}
|
|
|
|
// namespaceResolverAdapter bridges *namespace.Resolver (which returns
|
|
// []namespace.Namespace) to verify.go's verifyResolver interface
|
|
// (which wants []ResolvedNamespace). Keeps verify.go independent of
|
|
// the namespace-package dependency so its tests can stub easily.
|
|
type namespaceResolverAdapter struct {
|
|
r *namespace.Resolver
|
|
}
|
|
|
|
func (a namespaceResolverAdapter) ReadableNamespaces(ctx context.Context, workspaceID string) ([]ResolvedNamespace, error) {
|
|
src, err := a.r.ReadableNamespaces(ctx, workspaceID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out := make([]ResolvedNamespace, len(src))
|
|
for i, ns := range src {
|
|
out[i] = ResolvedNamespace{Name: ns.Name}
|
|
}
|
|
return out, nil
|
|
}
|