diff --git a/.github/workflows/handlers-postgres-integration.yml b/.github/workflows/handlers-postgres-integration.yml new file mode 100644 index 00000000..93c1b9f0 --- /dev/null +++ b/.github/workflows/handlers-postgres-integration.yml @@ -0,0 +1,126 @@ +name: Handlers Postgres Integration + +# Real-Postgres integration tests for workspace-server/internal/handlers/. +# Triggered on every PR/push that touches the handlers package. +# +# Why this workflow exists +# ------------------------ +# Strict-sqlmock unit tests pin which SQL statements fire — they're fast +# and let us iterate without a DB. But sqlmock CANNOT detect bugs that +# depend on the row state AFTER the SQL runs. The result_preview-lost +# bug shipped to staging in PR #2854 because every unit test was +# satisfied with "an UPDATE statement fired" — none verified the row's +# preview field actually landed. The local-postgres E2E that retrofit +# self-review caught it took 2 minutes to set up and would have caught +# the bug at PR-time. +# +# This job spins a Postgres service container, applies the migration, +# and runs `go test -tags=integration` against a live DB. Required +# check on staging branch protection — backend handler PRs cannot +# merge without a real-DB regression gate. +# +# Cost: ~30s job (postgres pull from GH cache + go build + 4 tests). + +on: + push: + branches: [main, staging] + pull_request: + branches: [main, staging] + merge_group: + types: [checks_requested] + workflow_dispatch: + +concurrency: + group: handlers-pg-integ-${{ github.event.pull_request.head.sha || github.sha }} + cancel-in-progress: false + +jobs: + detect-changes: + name: detect-changes + runs-on: ubuntu-latest + outputs: + handlers: ${{ steps.filter.outputs.handlers }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 + id: filter + with: + filters: | + handlers: + - 'workspace-server/internal/handlers/**' + - 'workspace-server/internal/wsauth/**' + - 'workspace-server/migrations/**' + - '.github/workflows/handlers-postgres-integration.yml' + + # Single-job-with-per-step-if pattern: always runs to satisfy the + # required-check name on branch protection; real work gates on the + # paths filter. See ci.yml's Platform (Go) for the same shape. + integration: + name: Handlers Postgres Integration + needs: detect-changes + runs-on: ubuntu-latest + services: + postgres: + image: postgres:15-alpine + env: + POSTGRES_PASSWORD: test + POSTGRES_DB: molecule + ports: + - 5432:5432 + # GHA spins this with --health-cmd built in for postgres images. + options: >- + --health-cmd pg_isready + --health-interval 5s + --health-timeout 5s + --health-retries 10 + defaults: + run: + working-directory: workspace-server + steps: + - if: needs.detect-changes.outputs.handlers != 'true' + working-directory: . + run: echo "No handlers/migrations changes — skipping; this job always runs to satisfy the required-check name." + + - if: needs.detect-changes.outputs.handlers == 'true' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - if: needs.detect-changes.outputs.handlers == 'true' + uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 + with: + go-version: 'stable' + + - if: needs.detect-changes.outputs.handlers == 'true' + name: Apply migrations to Postgres service + env: + PGPASSWORD: test + run: | + # Wait for postgres to actually accept connections (the + # GHA --health-cmd is best-effort but psql can still race). + for i in {1..15}; do + if pg_isready -h localhost -p 5432 -U postgres -q; then break; fi + echo "waiting for postgres..."; sleep 2 + done + # Apply only the delegations migration — it doesn't FK to + # workspaces, so we don't need to fold in the full migration + # chain (some earlier migrations have ordering dependencies on + # tables that have since been dropped). Per-test migrations + # under handlers/ should add their schema files to this list + # as needed. + psql -h localhost -U postgres -d molecule -v ON_ERROR_STOP=1 \ + -f migrations/049_delegations.up.sql + + - if: needs.detect-changes.outputs.handlers == 'true' + name: Run integration tests + env: + INTEGRATION_DB_URL: postgres://postgres:test@localhost:5432/molecule?sslmode=disable + run: | + go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_" + + - if: needs.detect-changes.outputs.handlers == 'true' && failure() + name: Diagnostic dump on failure + env: + PGPASSWORD: test + run: | + echo "::group::delegations table state" + psql -h localhost -U postgres -d molecule -c "SELECT * FROM delegations LIMIT 50;" || true + echo "::endgroup::" diff --git a/workspace-server/internal/handlers/delegation.go b/workspace-server/internal/handlers/delegation.go index 218df0b2..d9b66884 100644 --- a/workspace-server/internal/handlers/delegation.go +++ b/workspace-server/internal/handlers/delegation.go @@ -411,11 +411,15 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err) } - h.updateDelegationStatus(sourceID, delegationID, "completed", "") - // RFC #2829 #318 — mirror result_preview to the durable ledger - // (updateDelegationStatus handles the status flip; ledger gets the - // preview field set on the same row). + // RFC #2829 #318: write the ledger row with result_preview FIRST, + // THEN updateDelegationStatus. Order matters: SetStatus has a + // same-status replay no-op — if updateDelegationStatus's nested + // recordLedgerStatus(completed, "", "") fires first, the outer call + // hits the no-op branch and result_preview is never written. + // Caught by the local-Postgres integration test in + // delegation_ledger_integration_test.go. recordLedgerStatus(ctx, delegationID, "completed", "", responseText) + h.updateDelegationStatus(sourceID, delegationID, "completed", "") h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_COMPLETE", sourceID, map[string]interface{}{ "delegation_id": delegationID, "target_id": targetID, @@ -534,6 +538,13 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) { return } + // RFC #2829 #318 — same ordering pin as executeDelegation completion: + // write the with-preview ledger row FIRST so updateDelegationStatus's + // inner same-status no-op doesn't clobber preview. + if body.Status == "completed" { + recordLedgerStatus(ctx, delegationID, "completed", "", body.ResponsePreview) + } + h.updateDelegationStatus(sourceID, delegationID, body.Status, body.Error) if body.Status == "completed" { diff --git a/workspace-server/internal/handlers/delegation_ledger_integration_test.go b/workspace-server/internal/handlers/delegation_ledger_integration_test.go new file mode 100644 index 00000000..44b510a0 --- /dev/null +++ b/workspace-server/internal/handlers/delegation_ledger_integration_test.go @@ -0,0 +1,247 @@ +//go:build integration +// +build integration + +// delegation_ledger_integration_test.go — REAL Postgres integration tests +// for the RFC #2829 ledger writes. +// +// Run with: +// +// docker run --rm -d --name pg-integration \ +// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \ +// -p 55432:5432 postgres:15-alpine +// sleep 4 +// psql ... < workspace-server/migrations/049_delegations.up.sql +// cd workspace-server +// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \ +// go test -tags=integration ./internal/handlers/ -run Integration_ +// +// CI (.github/workflows/handlers-postgres-integration.yml) runs this on +// every PR that touches workspace-server/internal/handlers/**. +// +// Why these are NOT plain unit tests +// ---------------------------------- +// The strict-sqlmock unit tests in delegation_ledger_writes_test.go pin +// which SQL statements fire — they are fast and let us iterate without +// a DB. But sqlmock CANNOT detect bugs that depend on the ROW STATE +// after the SQL runs. The result_preview-lost bug shipped to staging in +// PR #2854 because every unit test was satisfied with "an UPDATE +// statement fired" — none verified the row's preview field landed. +// +// These integration tests close that gap by booting a real Postgres, +// running the production helpers, and SELECTing the row to verify the +// observable state matches the expected outcome. + +package handlers + +import ( + "context" + "database/sql" + "os" + "testing" + + mdb "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" + _ "github.com/lib/pq" +) + +// integrationDB returns the configured integration-test connection or +// skips the test if INTEGRATION_DB_URL is unset. Local devs run the +// docker-postgres incantation in the file header; CI's workflow sets the +// env var via a service container. +func integrationDB(t *testing.T) *sql.DB { + t.Helper() + url := os.Getenv("INTEGRATION_DB_URL") + if url == "" { + t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)") + } + conn, err := sql.Open("postgres", url) + if err != nil { + t.Fatalf("open: %v", err) + } + if err := conn.Ping(); err != nil { + t.Fatalf("ping: %v", err) + } + // Each test gets a fresh table state — fail loud if cleanup fails so + // a bad test doesn't pollute the next one. + if _, err := conn.ExecContext(context.Background(), `DELETE FROM delegations`); err != nil { + t.Fatalf("cleanup: %v", err) + } + // Wire the package-level db.DB so production helpers (recordLedgerInsert, + // recordLedgerStatus) see the same connection. + prev := mdb.DB + mdb.DB = conn + t.Cleanup(func() { + mdb.DB = prev + conn.Close() + }) + return conn +} + +// readLedgerRow returns (status, result_preview, error_detail) for the +// given delegation_id, or fails the test on miss. +func readLedgerRow(t *testing.T, conn *sql.DB, id string) (status, preview, errorDetail string) { + t.Helper() + var prev, errDet sql.NullString + err := conn.QueryRowContext(context.Background(), + `SELECT status, result_preview, error_detail FROM delegations WHERE delegation_id = $1`, id, + ).Scan(&status, &prev, &errDet) + if err != nil { + t.Fatalf("readLedgerRow(%s): %v", id, err) + } + return status, prev.String, errDet.String +} + +// TestIntegration_ResultPreviewPreservedThroughCompletion is the +// regression gate for the bug that shipped in PR #2854 + was caught in +// self-review: when both the inner SetStatus(completed, "", "") (from +// updateDelegationStatus) and an outer SetStatus(completed, "", preview) +// fire, the SECOND one is a same-status no-op — order matters. +// +// The fix in delegation.go calls the WITH-PREVIEW SetStatus FIRST so the +// outer write lands the preview, and the inner becomes the no-op. +// +// This test fires the call sequence in the corrected order and asserts +// the row's result_preview matches. +// +// If a future refactor reverses the order, this test fails on a real +// Postgres — which sqlmock would have missed. +func TestIntegration_ResultPreviewPreservedThroughCompletion(t *testing.T) { + conn := integrationDB(t) + t.Setenv("DELEGATION_LEDGER_WRITE", "1") + + id := "integ-deleg-preview-1" + caller := "11111111-1111-1111-1111-111111111111" + callee := "22222222-2222-2222-2222-222222222222" + expectedPreview := "the long-running task's final answer" + + // Mirror the production call sequence the FIXED code path uses. + // executeDelegation flow: + // 1. insertDelegationRow → recordLedgerInsert (status=queued) + // 2. updateDelegationStatus("dispatched", "") at the start of execute, + // so the row is at status=dispatched by completion time + // 3. recordLedgerStatus("completed", "", preview) ← outer FIRST (the fix) + // 4. updateDelegationStatus("completed", "") inside, which calls + // recordLedgerStatus("completed", "", "") ← inner same-status no-op + recordLedgerInsert(context.Background(), caller, callee, id, "the question", "") + recordLedgerStatus(context.Background(), id, "dispatched", "", "") + recordLedgerStatus(context.Background(), id, "completed", "", expectedPreview) + recordLedgerStatus(context.Background(), id, "completed", "", "") + + status, preview, errDet := readLedgerRow(t, conn, id) + if status != "completed" { + t.Errorf("status: want completed, got %q", status) + } + if preview != expectedPreview { + t.Errorf("result_preview lost: want %q, got %q", expectedPreview, preview) + } + if errDet != "" { + t.Errorf("error_detail should be empty: got %q", errDet) + } +} + +// TestIntegration_ResultPreviewBuggyOrderIsLost — DIAGNOSTIC test that +// confirms the ORIGINAL buggy order does lose the preview. Useful when +// auditing similar wiring elsewhere. +// +// This is documented behavior: it asserts the same-status replay no-op +// works as designed in DelegationLedger.SetStatus. The fix in +// delegation.go is to AVOID this order, not to change SetStatus's +// same-status semantics (which the operator dashboard relies on for +// idempotent completion notifications). +func TestIntegration_ResultPreviewBuggyOrderIsLost(t *testing.T) { + conn := integrationDB(t) + t.Setenv("DELEGATION_LEDGER_WRITE", "1") + + id := "integ-deleg-preview-2" + caller := "11111111-1111-1111-1111-111111111111" + callee := "22222222-2222-2222-2222-222222222222" + + // BUGGY sequence in production-shape order: queued → dispatched → + // completed (no preview) → completed (preview ignored as same-status). + recordLedgerInsert(context.Background(), caller, callee, id, "the question", "") + recordLedgerStatus(context.Background(), id, "dispatched", "", "") // pre-completion stage + recordLedgerStatus(context.Background(), id, "completed", "", "") // inner first + recordLedgerStatus(context.Background(), id, "completed", "", "the answer") // outer same-status no-op + + _, preview, _ := readLedgerRow(t, conn, id) + if preview != "" { + t.Errorf("buggy-order preview was unexpectedly non-empty: %q (SetStatus same-status no-op contract may have changed)", preview) + } +} + +// TestIntegration_FailedTransitionCapturesErrorDetail — error_detail is +// the failure-path equivalent of result_preview. The legacy path calls +// SetStatus(failed, errorDetail, "") via updateDelegationStatus; no +// outer call exists today (no observed bug). This test pins that +// error_detail lands as expected, so a future refactor adding an outer +// call must consciously preserve the field — same lesson as the preview +// bug, just on the failure path. +func TestIntegration_FailedTransitionCapturesErrorDetail(t *testing.T) { + conn := integrationDB(t) + t.Setenv("DELEGATION_LEDGER_WRITE", "1") + + id := "integ-deleg-fail-1" + caller := "11111111-1111-1111-1111-111111111111" + callee := "22222222-2222-2222-2222-222222222222" + expectedError := "callee unreachable: connection refused" + + // queued → failed is allowed by allowedTransitions (the failure-on- + // dispatch case) so this exercises a real production path. + recordLedgerInsert(context.Background(), caller, callee, id, "the question", "") + recordLedgerStatus(context.Background(), id, "failed", expectedError, "") + + status, preview, errDet := readLedgerRow(t, conn, id) + if status != "failed" { + t.Errorf("status: want failed, got %q", status) + } + if errDet != expectedError { + t.Errorf("error_detail: want %q, got %q", expectedError, errDet) + } + if preview != "" { + t.Errorf("result_preview should be empty on failure: got %q", preview) + } +} + +// TestIntegration_FullLifecycle_QueuedToDispatchedToCompleted — pins the +// happy-path lifecycle. INSERT lands the row at queued; SetStatus moves +// it through dispatched and into completed with preview. After the +// terminal transition, no further state change is possible via +// SetStatus (forward-only protection). +func TestIntegration_FullLifecycle_QueuedToDispatchedToCompleted(t *testing.T) { + conn := integrationDB(t) + t.Setenv("DELEGATION_LEDGER_WRITE", "1") + + id := "integ-deleg-lifecycle-1" + caller := "11111111-1111-1111-1111-111111111111" + callee := "22222222-2222-2222-2222-222222222222" + + recordLedgerInsert(context.Background(), caller, callee, id, "task body", "") + if status, _, _ := readLedgerRow(t, conn, id); status != "queued" { + t.Errorf("after Insert: status want queued, got %q", status) + } + recordLedgerStatus(context.Background(), id, "dispatched", "", "") + if status, _, _ := readLedgerRow(t, conn, id); status != "dispatched" { + t.Errorf("after dispatched: status want dispatched, got %q", status) + } + recordLedgerStatus(context.Background(), id, "completed", "", "the result") + status, preview, _ := readLedgerRow(t, conn, id) + if status != "completed" { + t.Errorf("after completed: status want completed, got %q", status) + } + if preview != "the result" { + t.Errorf("preview after completed: want %q, got %q", "the result", preview) + } + + // Forward-only: trying to revise to failed should silently no-op + // (recordLedgerStatus swallows ErrInvalidTransition). + recordLedgerStatus(context.Background(), id, "failed", "post-hoc revision", "") + status, preview, errDet := readLedgerRow(t, conn, id) + if status != "completed" { + t.Errorf("forward-only broken: status changed to %q", status) + } + if preview != "the result" { + t.Errorf("preview clobbered by failed revision: %q", preview) + } + if errDet != "" { + t.Errorf("error_detail clobbered by failed revision: %q", errDet) + } +}