forked from molecule-ai/molecule-core
feat(runtime): native_status_mgmt skip — primitive #4 of 6
When an adapter declares provides_native_status_mgmt=True (because its
SDK reports its own ready/degraded/failed state explicitly), the
platform's error-rate-based status inference fights the adapter's own
state machine. This PR gates the inference branches on the capability
flag — adapter-driven transitions become authoritative.
Components:
- registry.go evaluateStatus: gate the two inferred-status branches
(online → degraded when error_rate ≥ 0.5; degraded → online when
error_rate < 0.1 and runtime_state is empty) behind a check of
runtimeOverrides.HasCapability("status_mgmt").
- The wedged-branch (RuntimeState == "wedged" → degraded) is NOT
gated. That path is the adapter's OWN self-report, not platform
inference, and stays active under native_status_mgmt — adapters
can still drive transitions via runtime_state.
Python side: no change. The capability map is already serialized via
RuntimeCapabilities.to_dict() in PR #2137 and sent in the heartbeat's
runtime_metadata block via PR #2139. An adapter setting
RuntimeCapabilities(provides_native_status_mgmt=True) automatically
flows through.
Tests (3 new):
- SkipsDegradeInference: error_rate=0.8 + currentStatus=online + native
flag set → degrade UPDATE does NOT fire (sqlmock fails on unexpected
query, which is the regression cover)
- SkipsRecovery: error_rate=0.05 + currentStatus=degraded + native →
recovery UPDATE does NOT fire
- WedgedStillRespected: runtime_state="wedged" + native → wedged
branch DOES fire (adapter self-report stays active)
Verification:
- All Go handlers tests pass (3 new + existing)
- 1308/1308 Python pytest pass (unchanged — Python side unmodified)
- go build + go vet clean
Stacked on #2140 (already merged via cascade); branch is current with
staging since #2139 and #2140 merged.
See project memory `project_runtime_native_pluggable.md`.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c0a5d842b4
commit
b4b406c074
173
workspace-server/internal/handlers/native_status_mgmt_test.go
Normal file
173
workspace-server/internal/handlers/native_status_mgmt_test.go
Normal file
@ -0,0 +1,173 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
sqlmock "github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// TestHeartbeat_NativeStatusMgmt_SkipsDegradeInference validates capability
|
||||
// primitive #4: when an adapter declares native_status_mgmt, the platform's
|
||||
// error-rate-based status inference DOES NOT fire. Adapter owns the
|
||||
// transition; platform observes only. The wedged-branch (RuntimeState ==
|
||||
// "wedged") is NOT gated — it's the adapter's own self-report, not an
|
||||
// inference, and stays active.
|
||||
//
|
||||
// Mirrors the structure of TestHeartbeatHandler_Degraded but pre-populates
|
||||
// the runtimeOverrides cache with status_mgmt=true and asserts the degrade
|
||||
// UPDATE is NOT issued (so sqlmock's expectations don't include it).
|
||||
func TestHeartbeat_NativeStatusMgmt_SkipsDegradeInference(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
// Pre-populate the override cache so the workspace under test has
|
||||
// declared native_status_mgmt. Reset after so we don't pollute
|
||||
// other tests in the package.
|
||||
runtimeOverrides.SetCapabilities("ws-native-status", map[string]bool{"status_mgmt": true})
|
||||
defer runtimeOverrides.Reset()
|
||||
|
||||
// prevTask SELECT (before UPDATE)
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-native-status").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
|
||||
// heartbeat UPDATE — same as the non-native path
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-native-status", 0.8, "connection timeout", 0, 7200, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// evaluateStatus SELECT — currently online, error_rate=0.8 would
|
||||
// normally fire the degrade UPDATE. Under native_status_mgmt, it
|
||||
// MUST NOT. We deliberately don't ExpectExec the degrade UPDATE
|
||||
// — sqlmock fails the test if any UPDATE happens that wasn't
|
||||
// expected, which is the regression cover.
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-native-status").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-native-status","error_rate":0.8,"sample_error":"connection timeout","active_tasks":0,"uptime_seconds":7200}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
// CRITICAL: ExpectationsWereMet fails if the degrade UPDATE
|
||||
// happened (since we didn't expect it). This is the load-bearing
|
||||
// assertion for primitive #4.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations (or unexpected query — likely the degrade UPDATE fired despite native_status_mgmt): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeat_NativeStatusMgmt_SkipsRecovery validates the recovery
|
||||
// branch is also gated. Without this, an adapter using native_status_mgmt
|
||||
// would see the platform flip its workspace back to online whenever
|
||||
// heartbeat error_rate dropped — even if the adapter's own state
|
||||
// machine is currently reporting degraded for a non-error reason
|
||||
// (paused, hibernating, awaiting upstream, etc.).
|
||||
func TestHeartbeat_NativeStatusMgmt_SkipsRecovery(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
runtimeOverrides.SetCapabilities("ws-native-recovery", map[string]bool{"status_mgmt": true})
|
||||
defer runtimeOverrides.Reset()
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-native-recovery").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
|
||||
// heartbeat UPDATE — error_rate=0.05 would fire recovery
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-native-recovery", 0.05, "", 0, 7200, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// evaluateStatus SELECT — currently degraded; recovery branch
|
||||
// would normally fire UPDATE → online + WORKSPACE_ONLINE broadcast.
|
||||
// Under native_status_mgmt, neither should run.
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-native-recovery").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-native-recovery","error_rate":0.05,"sample_error":"","active_tasks":0,"uptime_seconds":7200}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("recovery branch fired despite native_status_mgmt: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeat_NativeStatusMgmt_WedgedStillRespected confirms the
|
||||
// adapter's own self-reported wedge IS still honored even when
|
||||
// native_status_mgmt is declared. The wedged path is the adapter's
|
||||
// own signal, not platform inference — switching ownership doesn't
|
||||
// silence it.
|
||||
func TestHeartbeat_NativeStatusMgmt_WedgedStillRespected(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
runtimeOverrides.SetCapabilities("ws-wedged", map[string]bool{"status_mgmt": true})
|
||||
defer runtimeOverrides.Reset()
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-wedged").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
|
||||
// heartbeat UPDATE — RuntimeState="wedged" means sample_error
|
||||
// reflects the wedge reason, error_rate stays 0
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-wedged", 0.0, "SDK init timeout — restart workspace", 0, 7200, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// evaluateStatus SELECT — currently online, wedged branch SHOULD fire
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-wedged").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
|
||||
|
||||
// Wedged degrade UPDATE — must still happen even with native_status_mgmt
|
||||
mock.ExpectExec("UPDATE workspaces SET status = 'degraded'").
|
||||
WithArgs("ws-wedged").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// WORKSPACE_DEGRADED broadcast still fires
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-wedged","error_rate":0.0,"sample_error":"SDK init timeout — restart workspace","active_tasks":0,"uptime_seconds":7200,"runtime_state":"wedged"}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("wedged path didn't fire as expected: %v", err)
|
||||
}
|
||||
}
|
||||
@ -520,7 +520,18 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
})
|
||||
}
|
||||
|
||||
if currentStatus == "online" && payload.ErrorRate >= 0.5 {
|
||||
// Skip the inferred-status branches when the adapter has declared
|
||||
// native_status_mgmt — its SDK reports its own ready/degraded/failed
|
||||
// state explicitly (typically via runtime_state above), and inferring
|
||||
// status from error_rate would fight that. Capability primitive #4
|
||||
// (task #117) — see project memory `project_runtime_native_pluggable.md`.
|
||||
//
|
||||
// The wedged-branch above (RuntimeState == "wedged") is NOT skipped:
|
||||
// it's the adapter's own self-report, not an inference. Adapters with
|
||||
// native_status_mgmt can keep using runtime_state to drive transitions.
|
||||
nativeStatus := runtimeOverrides.HasCapability(payload.WorkspaceID, "status_mgmt")
|
||||
|
||||
if !nativeStatus && currentStatus == "online" && payload.ErrorRate >= 0.5 {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'degraded', updated_at = now() WHERE id = $1`, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to mark %s degraded: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
@ -536,7 +547,10 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
// (claude_sdk_executor only clears it on restart), so when the
|
||||
// container restarts and starts heartbeating fresh — RuntimeState
|
||||
// is empty, error_rate is 0 — this branch flips us back to online.
|
||||
if currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
|
||||
//
|
||||
// Skipped under native_status_mgmt for the same reason as the
|
||||
// degrade branch above: the adapter owns the transition.
|
||||
if !nativeStatus && currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'online', updated_at = now() WHERE id = $1`, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to recover %s to online: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user