molecule-core/platform/internal/handlers/memory.go
rabbitblood 73171532a1 feat(memory): optimistic-locking via if_match_version on workspace_memory writes
Closes the silent-overwrite hole where two agents racing a read-modify-
write on the same memory key left only one agent's update. Relevant for
orchestrators (PM, Dev Lead, Marketing Lead) keeping structured running
state (delegation-result ledgers, task queues) in memory, and for the
``research-backlog:*`` keys that multiple idle loops write in parallel.

## Semantics

### Back-compat path (no if_match_version)
Unchanged: ``INSERT ... ON CONFLICT UPDATE`` last-write-wins. Every
existing agent tool, every existing ``commit_memory`` call, every
existing cron that writes memory — all continue to work with no edit.

### Optimistic-lock path (if_match_version set)
1. Client calls ``GET /memory/:key`` → ``{value, version: V}``
2. Client modifies value locally
3. Client ``POST /memory {key, value, if_match_version: V}``
4. Server: ``UPDATE ... WHERE version = V`` + RETURNING new version
5. On match → 200 + ``{version: V+1}``
6. On mismatch → 409 + ``{expected_version: V, current_version: <actual>}``
7. Client reads the actual version and retries.

### Create-only marker
``if_match_version: 0`` means "create iff the key doesn't exist yet".
Two agents simultaneously seeding a shared key will see exactly one
success + one 409 — no silent collision, no duplicate-init work.

### Schema

Migration 023 adds ``version BIGINT NOT NULL DEFAULT 1``. Existing rows
baseline at 1. New rows start at 1. Every successful write (both paths)
increments: ``version = version + 1`` on update, ``1`` on insert.

## Why version, not updated_at

``updated_at`` has second-granularity and can collide between concurrent
writers on a fast clock. A monotonic counter is collision-free and more
readable in the 409 response body ("expected 5, current is 7 — you
missed 2 writes" tells an agent exactly what to re-read).

## Why ``if_match_version`` and not an ETag header

JSON field keeps it in the request body, visible alongside the value
payload. Agents assembling requests programmatically don't have to
remember to thread a header through their HTTP client wrapper; the
existing ``commit_memory`` tool can grow one optional kwarg and match
the existing signature shape.

## Tests

11 memory-handler cases covering every path:
- GET list / get (with version in response shape)
- Set with no version (back-compat upsert, returns new version)
- Set with if_match_version match (happy path, increment)
- Set with if_match_version mismatch (409 + expected/current fields)
- Set with if_match_version=0 on absent key (create-only success)
- Set with if_match_version=N on absent key (409 — caller's mental
  model is wrong)
- Bad inputs (missing key, malformed JSON)
- Delete happy + error path

Full ``go test ./internal/handlers/`` green.

## Follow-up (not in this PR)

- Workspace-template tool update: ``commit_memory(content, *,
  if_match_version=None)`` surfaces the new option + on 409 surfaces
  the current_version so agents can retry without manual re-read.
- Named checkpoints table (``workspace_checkpoints``) for durable
  orchestrator state snapshots. Different concern than per-key locking;
  separate PR.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 02:32:46 -07:00

253 lines
8.5 KiB
Go

package handlers
import (
"database/sql"
"encoding/json"
"log"
"net/http"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// MemoryEntry is what GET returns. The Version field enables optimistic-
// concurrency on subsequent writes — callers echo it back as
// if_match_version to detect concurrent modification.
type MemoryEntry struct {
Key string `json:"key"`
Value json.RawMessage `json:"value"`
Version int64 `json:"version"`
ExpiresAt *time.Time `json:"expires_at,omitempty"`
UpdatedAt time.Time `json:"updated_at"`
}
type MemoryHandler struct{}
func NewMemoryHandler() *MemoryHandler { return &MemoryHandler{} }
// List handles GET /workspaces/:id/memory
func (h *MemoryHandler) List(c *gin.Context) {
workspaceID := c.Param("id")
rows, err := db.DB.QueryContext(c.Request.Context(), `
SELECT key, value, version, expires_at, updated_at
FROM workspace_memory
WHERE workspace_id = $1 AND (expires_at IS NULL OR expires_at > NOW())
ORDER BY key
`, workspaceID)
if err != nil {
log.Printf("Memory list error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
defer rows.Close()
entries := make([]MemoryEntry, 0)
for rows.Next() {
var entry MemoryEntry
var value []byte
if err := rows.Scan(&entry.Key, &value, &entry.Version, &entry.ExpiresAt, &entry.UpdatedAt); err != nil {
log.Printf("Memory list scan error: %v", err)
continue
}
entry.Value = json.RawMessage(value)
entries = append(entries, entry)
}
c.JSON(http.StatusOK, entries)
}
// Get handles GET /workspaces/:id/memory/:key
func (h *MemoryHandler) Get(c *gin.Context) {
workspaceID := c.Param("id")
key := c.Param("key")
var entry MemoryEntry
var value []byte
err := db.DB.QueryRowContext(c.Request.Context(), `
SELECT key, value, version, expires_at, updated_at
FROM workspace_memory
WHERE workspace_id = $1 AND key = $2 AND (expires_at IS NULL OR expires_at > NOW())
`, workspaceID, key).Scan(&entry.Key, &value, &entry.Version, &entry.ExpiresAt, &entry.UpdatedAt)
if err == sql.ErrNoRows {
c.JSON(http.StatusNotFound, gin.H{"error": "key not found"})
return
}
if err != nil {
log.Printf("Memory get error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
entry.Value = json.RawMessage(value)
c.JSON(http.StatusOK, entry)
}
// Set handles POST /workspaces/:id/memory with optimistic-locking support.
//
// Back-compat (no if_match_version): behaves exactly as before — last-
// write-wins upsert. Every existing agent tool keeps working unmodified.
//
// Optimistic-locking (if_match_version set): the write is conditional on
// the current row version. On conflict (concurrent writer incremented
// version since the caller read), returns 409 with the latest version so
// the caller can re-read + retry. This closes the silent-overwrite hole
// for orchestrators running concurrent delegation-ledger / task-queue
// state in memory.
//
// Expected call pattern for conflict-free reads:
//
// 1. GET /memory/:key → {value, version: V}
// 2. modify value
// 3. POST /memory with {key, value, if_match_version: V}
// 4. on 200 → done; on 409 → goto 1.
func (h *MemoryHandler) Set(c *gin.Context) {
workspaceID := c.Param("id")
var body struct {
Key string `json:"key"`
Value json.RawMessage `json:"value"`
TTLSeconds *int `json:"ttl_seconds"`
// IfMatchVersion, when non-nil, gates the write on the row's
// current version matching this value. Mismatch → 409 + latest
// version in the response so the caller can retry cleanly.
IfMatchVersion *int64 `json:"if_match_version"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if body.Key == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "key is required"})
return
}
var expiresAt *time.Time
if body.TTLSeconds != nil {
t := time.Now().Add(time.Duration(*body.TTLSeconds) * time.Second)
expiresAt = &t
}
// Path A — no version guard: unchanged last-write-wins upsert.
if body.IfMatchVersion == nil {
var newVersion int64
err := db.DB.QueryRowContext(c.Request.Context(), `
INSERT INTO workspace_memory(id, workspace_id, key, value, expires_at, updated_at, version)
VALUES(gen_random_uuid(), $1, $2, $3::jsonb, $4, NOW(), 1)
ON CONFLICT(workspace_id, key) DO UPDATE
SET value = EXCLUDED.value,
expires_at = EXCLUDED.expires_at,
updated_at = NOW(),
version = workspace_memory.version + 1
RETURNING version
`, workspaceID, body.Key, string(body.Value), expiresAt).Scan(&newVersion)
if err != nil {
log.Printf("Memory set error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to set memory"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "ok", "key": body.Key, "version": newVersion})
return
}
// Path B — optimistic-locking guard.
//
// Strategy:
// 1. Try to UPDATE the existing row with version check. RETURNING
// the new version tells us whether the guard matched.
// 2. If the UPDATE affected zero rows, the row either doesn't exist
// (treat if_match_version=0 as "must not exist yet", otherwise
// 409) or the version didn't match (409).
//
// We don't collapse into a single ON CONFLICT because we need the
// "caller expected version N, current is M" response shape to be
// accurate — ON CONFLICT DO NOTHING would hide whether it was a
// version-mismatch or something else.
expected := *body.IfMatchVersion
var newVersion int64
updateErr := db.DB.QueryRowContext(c.Request.Context(), `
UPDATE workspace_memory
SET value = $3::jsonb,
expires_at = $4,
updated_at = NOW(),
version = version + 1
WHERE workspace_id = $1 AND key = $2 AND version = $5
RETURNING version
`, workspaceID, body.Key, string(body.Value), expiresAt, expected).Scan(&newVersion)
if updateErr == sql.ErrNoRows {
// Either the row doesn't exist yet, or version mismatch. Look
// up the actual state so the 409 body carries useful context.
var currentVersion sql.NullInt64
probeErr := db.DB.QueryRowContext(c.Request.Context(), `
SELECT version FROM workspace_memory
WHERE workspace_id = $1 AND key = $2
`, workspaceID, body.Key).Scan(&currentVersion)
if probeErr == sql.ErrNoRows {
// Row absent. Caller with expected=0 means "create only" —
// honour it. Any other expected is a 409 (tried to update a
// non-existent key with version assertion).
if expected == 0 {
var createdVersion int64
err := db.DB.QueryRowContext(c.Request.Context(), `
INSERT INTO workspace_memory(id, workspace_id, key, value, expires_at, updated_at, version)
VALUES(gen_random_uuid(), $1, $2, $3::jsonb, $4, NOW(), 1)
RETURNING version
`, workspaceID, body.Key, string(body.Value), expiresAt).Scan(&createdVersion)
if err != nil {
log.Printf("Memory set error (create-only path): %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to set memory"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "ok", "key": body.Key, "version": createdVersion})
return
}
c.JSON(http.StatusConflict, gin.H{
"error": "if_match_version mismatch: key does not exist",
"expected_version": expected,
"current_version": nil,
})
return
}
if probeErr != nil {
log.Printf("Memory set probe error: %v", probeErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to probe current version"})
return
}
c.JSON(http.StatusConflict, gin.H{
"error": "if_match_version mismatch",
"expected_version": expected,
"current_version": currentVersion.Int64,
})
return
}
if updateErr != nil {
log.Printf("Memory set conditional update error: %v", updateErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to set memory"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "ok", "key": body.Key, "version": newVersion})
}
// Delete handles DELETE /workspaces/:id/memory/:key
func (h *MemoryHandler) Delete(c *gin.Context) {
workspaceID := c.Param("id")
key := c.Param("key")
_, err := db.DB.ExecContext(c.Request.Context(), `
DELETE FROM workspace_memory WHERE workspace_id = $1 AND key = $2
`, workspaceID, key)
if err != nil {
log.Printf("Memory delete error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "deleted"})
}