Merge pull request #2491 from Molecule-AI/followup/provision-panic-test-hardening

test(provision): harden panic tests with re-raise guard + broadcast count
This commit is contained in:
Hongming Wang 2026-05-02 03:18:03 +00:00 committed by GitHub
commit 093e5038d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 50 additions and 19 deletions

View File

@ -189,6 +189,22 @@ func TestProvisionWorkspaceCP_ConcurrentBurst_NoSilentDrop(t *testing.T) {
}
}
// Assertion 4: every goroutine's failure path called RecordAndBroadcast
// exactly once (via h.markProvisionFailed inside provisionWorkspaceCP's
// "start failed" arm). Cross-checks Assertion 2 from a different angle
// — if a goroutine reaches Start() but then loses its WORKSPACE_
// PROVISION_FAILED broadcast, the canvas spinner sticks on
// "provisioning" until the sweeper. That regression class is what
// drove making logProvisionPanic a method on *WorkspaceHandler — so
// it's worth pinning here too.
bcast.mu.Lock()
bcastCount := bcast.count
bcast.mu.Unlock()
if bcastCount != numWorkspaces {
t.Errorf("broadcaster saw %d RecordAndBroadcast calls, want %d. SILENT-DROP CLASS: either a goroutine reached cpProv.Start but was lost before markProvisionFailed, OR it exited via an earlier path before reaching Start (cross-check Assertion 2 above).",
bcastCount, numWorkspaces)
}
if err := mock.ExpectationsWereMet(); err != nil {
// Soft-fail: under concurrency some queries may have been
// re-ordered relative to the (non-strict) expectation set,

View File

@ -33,6 +33,10 @@ func newPanicTestHandler() (*WorkspaceHandler, *captureBroadcaster) {
// load-bearing — `log.Writer()` evaluated at defer-fire time would
// return the buffer (not the original writer) and never restore it,
// poisoning subsequent tests in the package.
//
// log.SetOutput is process-global: do NOT call this from a test that
// uses t.Parallel() or two captures will race + clobber. The panic
// tests below are intentionally non-parallel for this reason.
func captureLog(t *testing.T) *bytes.Buffer {
t.Helper()
var buf bytes.Buffer
@ -42,16 +46,35 @@ func captureLog(t *testing.T) *bytes.Buffer {
return &buf
}
// guardAgainstReraise wraps a function in a recover-arm that flips the
// returned bool to false if anything propagates past `defer
// h.logProvisionPanic(...)`. Used in every panic test (not just
// RecoversAndMarksFailed) so a future regression that re-raises from
// the recovery path surfaces as a clean test failure, not a process
// abort that crashes sibling tests.
func guardAgainstReraise(fn func()) (didNotPanic bool) {
didNotPanic = true
defer func() {
if r := recover(); r != nil {
didNotPanic = false
}
}()
fn()
return
}
func TestLogProvisionPanic_NoOpWhenNoPanic(t *testing.T) {
// Sanity: the deferred recover must be silent when nothing panicked.
// Otherwise every successful provision would emit a spurious panic log.
buf := captureLog(t)
h, cap := newPanicTestHandler()
func() {
if !guardAgainstReraise(func() {
defer h.logProvisionPanic("ws-no-panic", "cp")
// no panic
}()
}) {
t.Fatal("logProvisionPanic re-raised on the no-panic path — recover() returned non-nil for a goroutine that didn't panic")
}
if buf.Len() != 0 {
t.Fatalf("expected no log output when no panic, got: %q", buf.String())
@ -87,23 +110,13 @@ func TestLogProvisionPanic_RecoversAndMarksFailed(t *testing.T) {
h, cap := newPanicTestHandler()
// Exercise: a function that defers logProvisionPanic + then panics.
// The recover MUST swallow the panic — if it propagates, the test
// process crashes and the panic message bubbles up as a Go test
// failure rather than the assertion below.
didNotPanic := true
func() {
defer func() {
// If logProvisionPanic re-raised, this catches it for the
// test. We assert below that it did NOT re-raise.
if r := recover(); r != nil {
didNotPanic = false
}
}()
// The recover MUST swallow the panic — if it propagates,
// guardAgainstReraise catches it instead of letting the test
// process abort.
if !guardAgainstReraise(func() {
defer h.logProvisionPanic("ws-panic", "cp")
panic("simulated provision panic for #2486 regression")
}()
if !didNotPanic {
}) {
t.Fatal("logProvisionPanic re-raised the panic — the recover() arm did not swallow it")
}
@ -156,10 +169,12 @@ func TestLogProvisionPanic_PersistFailureLogged(t *testing.T) {
buf := captureLog(t)
h, _ := newPanicTestHandler()
func() {
if !guardAgainstReraise(func() {
defer h.logProvisionPanic("ws-panic-persist-fail", "docker")
panic("simulated panic with DB unavailable")
}()
}) {
t.Fatal("logProvisionPanic re-raised when the persist-failure path was exercised — recover() arm did not swallow")
}
logged := buf.String()
// markProvisionFailed logs `markProvisionFailed: db update failed for <id>: <err>`