From 008ddb994272461c5194409224bf829cb2707404 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 8 Jun 2026 05:32:09 +0000 Subject: [PATCH 1/3] fix(registry): heartbeat backfills agent_card when NULL (#2421) When a workspace's initial /registry/register fails (e.g. DNS propagation race on fast-cloud provisioners), the agent_card never lands and the agent stays offline. The runtime already sends agent_card in later heartbeats, but the heartbeat handler ignored it. - Add AgentCard to HeartbeatPayload (optional, omitempty). - In Heartbeat handler, UPDATE agent_card ONLY when the DB row has NULL agent_card. Never overwrites an existing reconciled card. - Add tests for backfill-when-null and skip-when-already-set. Fixes #2421 (option a) --- .../internal/handlers/registry.go | 19 +++++ .../internal/handlers/registry_test.go | 82 +++++++++++++++++++ workspace-server/internal/models/workspace.go | 6 ++ 3 files changed, 107 insertions(+) diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 2964f9e1f..1ee8a202e 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -690,6 +690,25 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) { return } + // #2421: backfill agent_card when the initial register failed and the + // heartbeat carries it. Only writes when NULL — never overwrites a + // reconciled or updated card. This is the recovery path for fast-cloud + // workspaces whose DNS wasn't ready at first register. + if len(payload.AgentCard) > 0 { + res, err := db.DB.ExecContext(ctx, ` + UPDATE workspaces + SET agent_card = $2 + WHERE id = $1 AND agent_card IS NULL + `, payload.WorkspaceID, payload.AgentCard) + if err != nil { + log.Printf("Registry heartbeat: agent_card backfill failed for %s: %v", payload.WorkspaceID, err) + } else { + if rows, _ := res.RowsAffected(); rows > 0 { + log.Printf("Registry heartbeat: backfilled agent_card for %s (initial register had failed)", payload.WorkspaceID) + } + } + } + // Refresh Redis TTL if err := db.RefreshTTL(ctx, payload.WorkspaceID); err != nil { log.Printf("Heartbeat redis error: %v", err) diff --git a/workspace-server/internal/handlers/registry_test.go b/workspace-server/internal/handlers/registry_test.go index a4058241d..b09bd3b39 100644 --- a/workspace-server/internal/handlers/registry_test.go +++ b/workspace-server/internal/handlers/registry_test.go @@ -755,6 +755,88 @@ func TestHeartbeat_SkipsRemovedRows(t *testing.T) { } } +// ==================== Heartbeat — agent_card backfill (#2421) ==================== + +func TestHeartbeatHandler_BackfillsAgentCard_WhenNull(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-nocard"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend"}).AddRow("", 0)) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-nocard", 0.0, "", 0, 0, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // #2421: backfill agent_card when heartbeat carries it and DB row is NULL + mock.ExpectExec("UPDATE workspaces SET agent_card ="). + WithArgs("ws-nocard", sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + mock.ExpectQuery("SELECT status FROM workspaces WHERE id ="). + WithArgs("ws-nocard"). + WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow(models.StatusOnline)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"workspace_id":"ws-nocard","agent_card":{"name":"backfilled"}}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +func TestHeartbeatHandler_SkipsAgentCardBackfill_WhenAlreadySet(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-hascard"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend"}).AddRow("", 0)) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-hascard", 0.0, "", 0, 0, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // #2421: backfill must be a no-op when agent_card already exists (0 rows affected) + mock.ExpectExec("UPDATE workspaces SET agent_card ="). + WithArgs("ws-hascard", sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 0)) + + mock.ExpectQuery("SELECT status FROM workspaces WHERE id ="). + WithArgs("ws-hascard"). + WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow(models.StatusOnline)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"workspace_id":"ws-hascard","agent_card":{"name":"ignored"}}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // ------------------------------------------------------------ // validateAgentURL (C6 SSRF fix) // ------------------------------------------------------------ diff --git a/workspace-server/internal/models/workspace.go b/workspace-server/internal/models/workspace.go index ca899c765..43c079fc9 100644 --- a/workspace-server/internal/models/workspace.go +++ b/workspace-server/internal/models/workspace.go @@ -143,6 +143,12 @@ type HeartbeatPayload struct { // false declared explicitly". Lets the platform distinguish "adapter // said no native ownership" from "old runtime version, didn't say". RuntimeMetadata *RuntimeMetadata `json:"runtime_metadata,omitempty"` + + // AgentCard is sent by the runtime on heartbeat when the initial + // /registry/register failed and the workspace has no persisted agent_card. + // The heartbeat handler backfills NULL agent_card rows so the workspace + // can come online without requiring a full re-register. (#2421) + AgentCard json.RawMessage `json:"agent_card,omitempty"` } // RuntimeMetadata is the adapter-declared capability + override block -- 2.52.0 From e55e641d185fa27c0319a5c8fbc65ed8857354e4 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 8 Jun 2026 09:43:28 +0000 Subject: [PATCH 2/3] trigger: re-run sop-checklist pull_request -- 2.52.0 From 6e98e08b0a74f2532d87872392378702670315e2 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 8 Jun 2026 22:30:22 +0000 Subject: [PATCH 3/3] ci: re-trigger required E2E API Smoke + Handlers PG checks -- 2.52.0