feat(runtime): native_status_mgmt skip — primitive #4 of 6

When an adapter declares provides_native_status_mgmt=True (because its
SDK reports its own ready/degraded/failed state explicitly), the
platform's error-rate-based status inference fights the adapter's own
state machine. This PR gates the inference branches on the capability
flag — adapter-driven transitions become authoritative.

Components:

  - registry.go evaluateStatus: gate the two inferred-status branches
    (online → degraded when error_rate ≥ 0.5; degraded → online when
    error_rate < 0.1 and runtime_state is empty) behind a check of
    runtimeOverrides.HasCapability("status_mgmt").

  - The wedged-branch (RuntimeState == "wedged" → degraded) is NOT
    gated. That path is the adapter's OWN self-report, not platform
    inference, and stays active under native_status_mgmt — adapters
    can still drive transitions via runtime_state.

Python side: no change. The capability map is already serialized via
RuntimeCapabilities.to_dict() in PR #2137 and sent in the heartbeat's
runtime_metadata block via PR #2139. An adapter setting
RuntimeCapabilities(provides_native_status_mgmt=True) automatically
flows through.

Tests (3 new):
  - SkipsDegradeInference: error_rate=0.8 + currentStatus=online + native
    flag set → degrade UPDATE does NOT fire (sqlmock fails on unexpected
    query, which is the regression cover)
  - SkipsRecovery: error_rate=0.05 + currentStatus=degraded + native →
    recovery UPDATE does NOT fire
  - WedgedStillRespected: runtime_state="wedged" + native → wedged
    branch DOES fire (adapter self-report stays active)

Verification:
  - All Go handlers tests pass (3 new + existing)
  - 1308/1308 Python pytest pass (unchanged — Python side unmodified)
  - go build + go vet clean

Stacked on #2140 (already merged via cascade); branch is current with
staging since #2139 and #2140 merged.

See project memory `project_runtime_native_pluggable.md`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-26 23:13:13 -07:00
parent c0a5d842b4
commit b4b406c074
2 changed files with 189 additions and 2 deletions

View File

@ -0,0 +1,173 @@
package handlers
import (
"bytes"
"net/http"
"net/http/httptest"
"testing"
sqlmock "github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// TestHeartbeat_NativeStatusMgmt_SkipsDegradeInference validates capability
// primitive #4: when an adapter declares native_status_mgmt, the platform's
// error-rate-based status inference DOES NOT fire. Adapter owns the
// transition; platform observes only. The wedged-branch (RuntimeState ==
// "wedged") is NOT gated — it's the adapter's own self-report, not an
// inference, and stays active.
//
// Mirrors the structure of TestHeartbeatHandler_Degraded but pre-populates
// the runtimeOverrides cache with status_mgmt=true and asserts the degrade
// UPDATE is NOT issued (so sqlmock's expectations don't include it).
func TestHeartbeat_NativeStatusMgmt_SkipsDegradeInference(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
// Pre-populate the override cache so the workspace under test has
// declared native_status_mgmt. Reset after so we don't pollute
// other tests in the package.
runtimeOverrides.SetCapabilities("ws-native-status", map[string]bool{"status_mgmt": true})
defer runtimeOverrides.Reset()
// prevTask SELECT (before UPDATE)
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-native-status").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// heartbeat UPDATE — same as the non-native path
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-native-status", 0.8, "connection timeout", 0, 7200, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// evaluateStatus SELECT — currently online, error_rate=0.8 would
// normally fire the degrade UPDATE. Under native_status_mgmt, it
// MUST NOT. We deliberately don't ExpectExec the degrade UPDATE
// — sqlmock fails the test if any UPDATE happens that wasn't
// expected, which is the regression cover.
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-native-status").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-native-status","error_rate":0.8,"sample_error":"connection timeout","active_tasks":0,"uptime_seconds":7200}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
// CRITICAL: ExpectationsWereMet fails if the degrade UPDATE
// happened (since we didn't expect it). This is the load-bearing
// assertion for primitive #4.
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations (or unexpected query — likely the degrade UPDATE fired despite native_status_mgmt): %v", err)
}
}
// TestHeartbeat_NativeStatusMgmt_SkipsRecovery validates the recovery
// branch is also gated. Without this, an adapter using native_status_mgmt
// would see the platform flip its workspace back to online whenever
// heartbeat error_rate dropped — even if the adapter's own state
// machine is currently reporting degraded for a non-error reason
// (paused, hibernating, awaiting upstream, etc.).
func TestHeartbeat_NativeStatusMgmt_SkipsRecovery(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
runtimeOverrides.SetCapabilities("ws-native-recovery", map[string]bool{"status_mgmt": true})
defer runtimeOverrides.Reset()
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-native-recovery").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// heartbeat UPDATE — error_rate=0.05 would fire recovery
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-native-recovery", 0.05, "", 0, 7200, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// evaluateStatus SELECT — currently degraded; recovery branch
// would normally fire UPDATE → online + WORKSPACE_ONLINE broadcast.
// Under native_status_mgmt, neither should run.
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-native-recovery").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-native-recovery","error_rate":0.05,"sample_error":"","active_tasks":0,"uptime_seconds":7200}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("recovery branch fired despite native_status_mgmt: %v", err)
}
}
// TestHeartbeat_NativeStatusMgmt_WedgedStillRespected confirms the
// adapter's own self-reported wedge IS still honored even when
// native_status_mgmt is declared. The wedged path is the adapter's
// own signal, not platform inference — switching ownership doesn't
// silence it.
func TestHeartbeat_NativeStatusMgmt_WedgedStillRespected(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
runtimeOverrides.SetCapabilities("ws-wedged", map[string]bool{"status_mgmt": true})
defer runtimeOverrides.Reset()
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-wedged").
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
// heartbeat UPDATE — RuntimeState="wedged" means sample_error
// reflects the wedge reason, error_rate stays 0
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-wedged", 0.0, "SDK init timeout — restart workspace", 0, 7200, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// evaluateStatus SELECT — currently online, wedged branch SHOULD fire
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-wedged").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
// Wedged degrade UPDATE — must still happen even with native_status_mgmt
mock.ExpectExec("UPDATE workspaces SET status = 'degraded'").
WithArgs("ws-wedged").
WillReturnResult(sqlmock.NewResult(0, 1))
// WORKSPACE_DEGRADED broadcast still fires
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-wedged","error_rate":0.0,"sample_error":"SDK init timeout — restart workspace","active_tasks":0,"uptime_seconds":7200,"runtime_state":"wedged"}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("wedged path didn't fire as expected: %v", err)
}
}

View File

@ -520,7 +520,18 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
})
}
if currentStatus == "online" && payload.ErrorRate >= 0.5 {
// Skip the inferred-status branches when the adapter has declared
// native_status_mgmt — its SDK reports its own ready/degraded/failed
// state explicitly (typically via runtime_state above), and inferring
// status from error_rate would fight that. Capability primitive #4
// (task #117) — see project memory `project_runtime_native_pluggable.md`.
//
// The wedged-branch above (RuntimeState == "wedged") is NOT skipped:
// it's the adapter's own self-report, not an inference. Adapters with
// native_status_mgmt can keep using runtime_state to drive transitions.
nativeStatus := runtimeOverrides.HasCapability(payload.WorkspaceID, "status_mgmt")
if !nativeStatus && currentStatus == "online" && payload.ErrorRate >= 0.5 {
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'degraded', updated_at = now() WHERE id = $1`, payload.WorkspaceID); err != nil {
log.Printf("Heartbeat: failed to mark %s degraded: %v", payload.WorkspaceID, err)
}
@ -536,7 +547,10 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
// (claude_sdk_executor only clears it on restart), so when the
// container restarts and starts heartbeating fresh — RuntimeState
// is empty, error_rate is 0 — this branch flips us back to online.
if currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
//
// Skipped under native_status_mgmt for the same reason as the
// degrade branch above: the adapter owns the transition.
if !nativeStatus && currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'online', updated_at = now() WHERE id = $1`, payload.WorkspaceID); err != nil {
log.Printf("Heartbeat: failed to recover %s to online: %v", payload.WorkspaceID, err)
}