fix(registry): heartbeat backfills agent_card when NULL (#2421) #2428

Merged
devops-engineer merged 4 commits from fix/2421-heartbeat-backfill-agent-card into main 2026-06-08 22:46:52 +00:00
3 changed files with 107 additions and 0 deletions
@@ -724,6 +724,25 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
return
}
// #2421: backfill agent_card when the initial register failed and the
// heartbeat carries it. Only writes when NULL — never overwrites a
// reconciled or updated card. This is the recovery path for fast-cloud
// workspaces whose DNS wasn't ready at first register.
if len(payload.AgentCard) > 0 {
res, err := db.DB.ExecContext(ctx, `
UPDATE workspaces
SET agent_card = $2
WHERE id = $1 AND agent_card IS NULL
`, payload.WorkspaceID, payload.AgentCard)
if err != nil {
log.Printf("Registry heartbeat: agent_card backfill failed for %s: %v", payload.WorkspaceID, err)
} else {
if rows, _ := res.RowsAffected(); rows > 0 {
log.Printf("Registry heartbeat: backfilled agent_card for %s (initial register had failed)", payload.WorkspaceID)
}
}
}
// Refresh Redis TTL
if err := db.RefreshTTL(ctx, payload.WorkspaceID); err != nil {
log.Printf("Heartbeat redis error: %v", err)
@@ -755,6 +755,88 @@ func TestHeartbeat_SkipsRemovedRows(t *testing.T) {
}
}
// ==================== Heartbeat — agent_card backfill (#2421) ====================
func TestHeartbeatHandler_BackfillsAgentCard_WhenNull(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-nocard").
WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend"}).AddRow("", 0))
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-nocard", 0.0, "", 0, 0, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// #2421: backfill agent_card when heartbeat carries it and DB row is NULL
mock.ExpectExec("UPDATE workspaces SET agent_card =").
WithArgs("ws-nocard", sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-nocard").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow(models.StatusOnline))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-nocard","agent_card":{"name":"backfilled"}}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestHeartbeatHandler_SkipsAgentCardBackfill_WhenAlreadySet(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
mock.ExpectQuery("SELECT COALESCE\\(current_task").
WithArgs("ws-hascard").
WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend"}).AddRow("", 0))
mock.ExpectExec("UPDATE workspaces SET").
WithArgs("ws-hascard", 0.0, "", 0, 0, "").
WillReturnResult(sqlmock.NewResult(0, 1))
// #2421: backfill must be a no-op when agent_card already exists (0 rows affected)
mock.ExpectExec("UPDATE workspaces SET agent_card =").
WithArgs("ws-hascard", sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
WithArgs("ws-hascard").
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow(models.StatusOnline))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"workspace_id":"ws-hascard","agent_card":{"name":"ignored"}}`
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Heartbeat(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ------------------------------------------------------------
// validateAgentURL (C6 SSRF fix)
// ------------------------------------------------------------
@@ -143,6 +143,12 @@ type HeartbeatPayload struct {
// false declared explicitly". Lets the platform distinguish "adapter
// said no native ownership" from "old runtime version, didn't say".
RuntimeMetadata *RuntimeMetadata `json:"runtime_metadata,omitempty"`
// AgentCard is sent by the runtime on heartbeat when the initial
// /registry/register failed and the workspace has no persisted agent_card.
// The heartbeat handler backfills NULL agent_card rows so the workspace
// can come online without requiring a full re-register. (#2421)
AgentCard json.RawMessage `json:"agent_card,omitempty"`
}
// RuntimeMetadata is the adapter-declared capability + override block