From 0dec6d08d102e0d24702f41aad300a5d392e71d1 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 20 Jun 2026 18:59:09 +0000 Subject: [PATCH 1/5] feat(core): fail-loud when platform concierge's declared management MCP is not loaded Part of #3082 (core-side). - Add LoadedMCPTools to HeartbeatPayload so the runtime can report the MCP tools it actually loaded. - In the heartbeat evaluateStatus path, when a platform concierge has the management MCP declared in workspace_declared_plugins but the reported loaded_mcp_tools list does not contain it, mark the workspace degraded and broadcast WORKSPACE_DEGRADED. - Backward-compat: omitting loaded_mcp_tools skips the check, so older runtimes are not disrupted. - Add unit tests for missing/loaded/old-runtime cases. The runtime-side log line for --strict-mcp-config will land in a separate follow-up PR against molecule-ai-workspace-template-claude-code. Co-Authored-By: Claude --- .../handlers/concierge_mcp_loaded_test.go | 161 ++++++++++++++++++ .../internal/handlers/registry.go | 56 ++++++ workspace-server/internal/models/workspace.go | 7 + 3 files changed, 224 insertions(+) create mode 100644 workspace-server/internal/handlers/concierge_mcp_loaded_test.go diff --git a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go new file mode 100644 index 00000000..01fbe74c --- /dev/null +++ b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go @@ -0,0 +1,161 @@ +package handlers + +import ( + "bytes" + "net/http" + "net/http/httptest" + "testing" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models" + "github.com/DATA-DOG/go-sqlmock" + "github.com/gin-gonic/gin" +) + +// TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded +// verifies core#3082: a platform concierge that reports loaded_mcp_tools but +// does NOT include the declared management MCP is marked degraded. +func TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + // Initial heartbeat UPDATE. + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-mcp-missing"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend", "status"}).AddRow("", 0, "online")) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-mcp-missing", 0.0, "", 0, 60, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // evaluateStatus: currentStatus=online, kind=platform. + mock.ExpectQuery("SELECT status, kind, last_register_failure_at FROM workspaces WHERE id ="). + WithArgs("ws-mcp-missing"). + WillReturnRows(sqlmock.NewRows([]string{"status", "kind", "last_register_failure_at"}).AddRow("online", "platform", nil)) + + // platformAgentHasModelSecret: model secret exists. + mock.ExpectQuery("SELECT EXISTS"). + WithArgs("ws-mcp-missing"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + + // platformAgentManagementMCPLoaded: listDeclaredPlugins returns management MCP. + mock.ExpectQuery("SELECT plugin_name, source_raw FROM workspace_declared_plugins"). + WithArgs("ws-mcp-missing"). + WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "source_raw"}). + AddRow(conciergePlatformMCPName, "gitea://molecule-ai/molecule-ai-plugin-molecule-platform-mcp#main")) + + // Degraded UPDATE. + mock.ExpectExec("UPDATE workspaces SET status =.*status = 'online'"). + WithArgs(models.StatusDegraded, "platform agent management MCP declared but not loaded; marking degraded (core#3082)", "ws-mcp-missing"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // WORKSPACE_DEGRADED broadcast. + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"workspace_id":"ws-mcp-missing","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["a2a","some_other_tool"]}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestHeartbeatHandler_PlatformManagementMCPLoaded_StaysOnline verifies that +// a platform concierge reporting the management MCP in loaded_mcp_tools stays +// online. +func TestHeartbeatHandler_PlatformManagementMCPLoaded_StaysOnline(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-mcp-ok"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend", "status"}).AddRow("", 0, "online")) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-mcp-ok", 0.0, "", 0, 60, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + mock.ExpectQuery("SELECT status, kind, last_register_failure_at FROM workspaces WHERE id ="). + WithArgs("ws-mcp-ok"). + WillReturnRows(sqlmock.NewRows([]string{"status", "kind", "last_register_failure_at"}).AddRow("online", "platform", nil)) + + mock.ExpectQuery("SELECT EXISTS"). + WithArgs("ws-mcp-ok"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + + mock.ExpectQuery("SELECT plugin_name, source_raw FROM workspace_declared_plugins"). + WithArgs("ws-mcp-ok"). + WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "source_raw"}). + AddRow(conciergePlatformMCPName, "gitea://molecule-ai/molecule-ai-plugin-molecule-platform-mcp#main")) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"workspace_id":"ws-mcp-ok","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["a2a","` + conciergePlatformMCPName + `"]}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestHeartbeatHandler_OldRuntimeNoLoadedTools_StaysOnline verifies backward +// compat: a platform concierge whose runtime does not report loaded_mcp_tools +// (nil field) is not degraded by the core#3082 check. +func TestHeartbeatHandler_OldRuntimeNoLoadedTools_StaysOnline(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-old-runtime"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend", "status"}).AddRow("", 0, "online")) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-old-runtime", 0.0, "", 0, 60, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + mock.ExpectQuery("SELECT status, kind, last_register_failure_at FROM workspaces WHERE id ="). + WithArgs("ws-old-runtime"). + WillReturnRows(sqlmock.NewRows([]string{"status", "kind", "last_register_failure_at"}).AddRow("online", "platform", nil)) + + mock.ExpectQuery("SELECT EXISTS"). + WithArgs("ws-old-runtime"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"workspace_id":"ws-old-runtime","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index b6cce98a..5660cd26 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -413,6 +413,40 @@ func (h *RegistryHandler) platformAgentMCPServerPresent(present *bool) bool { return present == nil || *present } +// platformAgentManagementMCPLoaded reports whether the concierge's declared +// management MCP (workspace_declared_plugins) is present in the runtime's +// reported loaded_mcp_tools list. It returns true only when: +// - the workspace has the management plugin declared, AND +// - the reported loaded tool list does not contain the declared plugin name. +// +// If the management plugin is not declared (e.g. a non-platform workspace or a +// platform concierge before plugin reconciliation), it returns false. Errors +// are returned to the caller for logging; a failed lookup must not silently +// look healthy. +func (h *RegistryHandler) platformAgentManagementMCPLoaded(ctx context.Context, workspaceID string, loaded []string) (bool, error) { + declared, err := listDeclaredPlugins(ctx, workspaceID) + if err != nil { + return false, fmt.Errorf("listDeclaredPlugins: %w", err) + } + + hasDeclaredManagement := false + for _, d := range declared { + if d.PluginName == conciergePlatformMCPName { + hasDeclaredManagement = true + break + } + } + if !hasDeclaredManagement { + return false, nil + } + + loadedSet := make(map[string]bool, len(loaded)) + for _, t := range loaded { + loadedSet[t] = true + } + return !loadedSet[conciergePlatformMCPName], nil +} + // markWorkspaceFailed updates a workspace row to status='failed' and broadcasts // WORKSPACE_PROVISION_FAILED. It is a RegistryHandler-local fallback for the // fail-closed platform-agent identity gate; the WorkspaceHandler's @@ -1277,6 +1311,28 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea h.markWorkspaceFailed(ctx, payload.WorkspaceID, msg, reason) return } + + // core#3082: post-online fail-loud for a missing declared management MCP. + // If the runtime reports loaded_mcp_tools, compare it against the + // concierge's declared management plugin. A declared-but-not-loaded + // management MCP marks the workspace degraded and alerts instead of + // serving silently. + if payload.LoadedMCPTools != nil { + managementMissing, mErr := h.platformAgentManagementMCPLoaded(ctx, payload.WorkspaceID, payload.LoadedMCPTools) + if mErr != nil { + log.Printf("Heartbeat: management MCP load check failed for %s: %v", payload.WorkspaceID, mErr) + } else if managementMissing { + msg := "platform agent management MCP declared but not loaded; marking degraded (core#3082)" + log.Printf("Heartbeat: %s (workspace=%s)", msg, payload.WorkspaceID) + if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, last_sample_error = $2, updated_at = now() WHERE id = $3 AND status = 'online'`, models.StatusDegraded, msg, payload.WorkspaceID); err != nil { + log.Printf("Heartbeat: failed to mark %s degraded (management MCP missing): %v", payload.WorkspaceID, err) + } + h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{ + "management_mcp_missing": true, + "sample_error": msg, + }) + } + } } // Self-reported runtime wedge: takes precedence over the error_rate diff --git a/workspace-server/internal/models/workspace.go b/workspace-server/internal/models/workspace.go index a47c2b27..12f95699 100644 --- a/workspace-server/internal/models/workspace.go +++ b/workspace-server/internal/models/workspace.go @@ -158,6 +158,13 @@ type HeartbeatPayload struct { // so the fail-closed platform-agent gate can block recovery paths that // would otherwise resurrect an mcp-less concierge (RCA #2970). MCPServerPresent *bool `json:"mcp_server_present,omitempty"` + + // LoadedMCPTools is the list of MCP tool names the runtime reports as + // actually loaded for this workspace. For platform concierges, core + // cross-checks this against the declared management MCP so a missing + // plugin is surfaced as degraded instead of silent (core#3082). + // nil/omitted means the runtime does not yet speak this contract. + LoadedMCPTools []string `json:"loaded_mcp_tools,omitempty"` } // RuntimeMetadata is the adapter-declared capability + override block -- 2.52.0 From 402092a1bd79015086029900dbb31ff96f739393 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer B (MiniMax)" Date: Sat, 20 Jun 2026 22:22:48 +0000 Subject: [PATCH 2/5] fix(core#3082): assert literal create_workspace tool + fail-loud absent tools (CR2 #12653) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BOTH CR2+Researcher RC on PR #3101, false-green. Two fixes: 1. platformAgentManagementMCPLoaded compared loaded_mcp_tools entries against conciergePlatformMCPName (the plugin/SERVER install name), but loaded_mcp_tools carries namespaced TOOL identifiers (mcp____) — the check was a no-op false-green. Fix: assert the literal required tool identifier 'mcp__molecule-platform__create_workspace' is in the surfaced tool list. New constant conciergePlatformMCPCreateWorkspaceTool pins the value, cross-referenced to the staging concierge E2E so a drift in one breaks both with the same signal. 2. The gate ran only when payload.LoadedMCPTools was non-nil — but the deployed runtime emits mcp_server_present=true (the #147 contract) and no loaded_mcp_tools producer yet, so the gate silently-skipped every production concierge. Fix: trigger the gate off mcp_server_present=true (the field the runtime actually emits). When triggered, two fail-loud paths: - loaded_mcp_tools present but missing the required tool → degraded - loaded_mcp_tools ABSENT → degraded (CR2+Researcher: 'make absent-data fail-loud not silent-skip') Backward compat: pre-#147 runtimes (mcp_server_present nil) bypass the gate, matching the existing nil-tolerance in platformAgentMCPServerPresent. FLAG to PM: the runtime side still needs a loaded_mcp_tools producer to make the deployed path healthy. Until it lands, every platform concierge with mcp_server_present=true will report degraded — which is the correct fail-loud signal #3082 exists to produce. Coordinate with Dev-A/CTO for the runtime-repo follow-up. Tests: kept the existing 'missing tool' test (now uses real namespaced tool ids, not the server name); flipped the 'old runtime' test to a 'runtime speaks #147 but no loaded_mcp_tools' test that asserts degraded; added a 'pre-#147 runtime stays online' test for the backward-compat path; kept 'tool loaded stays online' with the literal create_workspace tool. 4/4 pass; full handlers suite PASS. --- .../handlers/concierge_mcp_loaded_test.go | 105 +++++++++++++++--- .../internal/handlers/platform_agent.go | 22 ++++ .../internal/handlers/registry.go | 78 ++++++++++--- workspace-server/internal/models/workspace.go | 20 +++- 4 files changed, 187 insertions(+), 38 deletions(-) diff --git a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go index 01fbe74c..b0e342b0 100644 --- a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go +++ b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go @@ -12,8 +12,12 @@ import ( ) // TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded -// verifies core#3082: a platform concierge that reports loaded_mcp_tools but -// does NOT include the declared management MCP is marked degraded. +// verifies core#3082 (CR2 #12653 fix): a platform concierge that reports +// loaded_mcp_tools but does NOT include the literal required tool identifier +// `mcp__molecule-platform__create_workspace` is marked degraded. The old +// check compared the loaded tools against the plugin NAME +// (`molecule-ai-plugin-molecule-platform-mcp`) which never matches the +// namespaced tool ids Claude Code dispatches — that was the false-green. func TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) @@ -45,7 +49,7 @@ func TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded(t * WillReturnRows(sqlmock.NewRows([]string{"plugin_name", "source_raw"}). AddRow(conciergePlatformMCPName, "gitea://molecule-ai/molecule-ai-plugin-molecule-platform-mcp#main")) - // Degraded UPDATE. + // Degraded UPDATE — required tool absent. mock.ExpectExec("UPDATE workspaces SET status =.*status = 'online'"). WithArgs(models.StatusDegraded, "platform agent management MCP declared but not loaded; marking degraded (core#3082)", "ws-mcp-missing"). WillReturnResult(sqlmock.NewResult(0, 1)) @@ -57,7 +61,8 @@ func TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded(t * w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"workspace_id":"ws-mcp-missing","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["a2a","some_other_tool"]}` + // loaded_mcp_tools has plenty of tools but NOT the literal required one. + body := `{"workspace_id":"ws-mcp-missing","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["a2a","mcp__other-server__other-tool"]}` c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -72,8 +77,10 @@ func TestHeartbeatHandler_PlatformManagementMCPMissing_FlipsOnlineToDegraded(t * } // TestHeartbeatHandler_PlatformManagementMCPLoaded_StaysOnline verifies that -// a platform concierge reporting the management MCP in loaded_mcp_tools stays -// online. +// a platform concierge reporting the literal required create_workspace tool +// in loaded_mcp_tools stays online. (The previous test loaded the plugin +// NAME as a fake tool — that was a no-op false-green; this test pins the +// real contract.) func TestHeartbeatHandler_PlatformManagementMCPLoaded_StaysOnline(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) @@ -104,7 +111,8 @@ func TestHeartbeatHandler_PlatformManagementMCPLoaded_StaysOnline(t *testing.T) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"workspace_id":"ws-mcp-ok","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["a2a","` + conciergePlatformMCPName + `"]}` + // loaded_mcp_tools carries the literal required tool identifier. + body := `{"workspace_id":"ws-mcp-ok","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["a2a","` + conciergePlatformMCPCreateWorkspaceTool + `"]}` c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -118,35 +126,100 @@ func TestHeartbeatHandler_PlatformManagementMCPLoaded_StaysOnline(t *testing.T) } } -// TestHeartbeatHandler_OldRuntimeNoLoadedTools_StaysOnline verifies backward -// compat: a platform concierge whose runtime does not report loaded_mcp_tools -// (nil field) is not degraded by the core#3082 check. -func TestHeartbeatHandler_OldRuntimeNoLoadedTools_StaysOnline(t *testing.T) { +// TestHeartbeatHandler_RuntimeEmitsServerPresentButNoLoadedTools_Degraded +// pins the CR2+Researcher fail-loud behavior: a runtime that speaks the +// #147 contract (mcp_server_present=true) but does NOT report the new +// loaded_mcp_tools producer cannot prove the management MCP is actually +// loaded — flip to degraded instead of silent-skip. The previous +// "old-runtime stays online" test was the false-green #3082 exists to +// catch; the new contract says: if you can prove server-up, prove tools +// too, or fail loud. +func TestHeartbeatHandler_RuntimeEmitsServerPresentButNoLoadedTools_Degraded(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() handler := NewRegistryHandler(broadcaster) mock.ExpectQuery("SELECT COALESCE\\(current_task"). - WithArgs("ws-old-runtime"). + WithArgs("ws-server-present-no-tools"). WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend", "status"}).AddRow("", 0, "online")) mock.ExpectExec("UPDATE workspaces SET"). - WithArgs("ws-old-runtime", 0.0, "", 0, 60, ""). + WithArgs("ws-server-present-no-tools", 0.0, "", 0, 60, ""). WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectQuery("SELECT status, kind, last_register_failure_at FROM workspaces WHERE id ="). - WithArgs("ws-old-runtime"). + WithArgs("ws-server-present-no-tools"). WillReturnRows(sqlmock.NewRows([]string{"status", "kind", "last_register_failure_at"}).AddRow("online", "platform", nil)) mock.ExpectQuery("SELECT EXISTS"). - WithArgs("ws-old-runtime"). + WithArgs("ws-server-present-no-tools"). WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + // Degraded UPDATE — runtime spoke server-present but omitted loaded_mcp_tools. + mock.ExpectExec("UPDATE workspaces SET status =.*status = 'online'"). + WithArgs(models.StatusDegraded, "platform agent runtime did not report loaded_mcp_tools on a mcp_server_present=true heartbeat; cannot verify create_workspace tool is loaded — marking degraded (core#3082)", "ws-server-present-no-tools"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"workspace_id":"ws-old-runtime","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true}` + // mcp_server_present=true but loaded_mcp_tools absent — runtime needs a + // loaded_mcp_tools producer. Until it does, every platform concierge + // will be flagged degraded (which is the honest signal). + body := `{"workspace_id":"ws-server-present-no-tools","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestHeartbeatHandler_Pre147RuntimeNoMCPServerPresent_StaysOnline pins the +// backward-compat path: a runtime that predates the #147 contract (neither +// mcp_server_present nor loaded_mcp_tools) does NOT trigger the #3082 gate. +// The earlier platformAgentMCPServerPresent nil-tolerance keeps legacy +// runtimes serving until the runtime-side loaded_mcp_tools producer lands. +func TestHeartbeatHandler_Pre147RuntimeNoMCPServerPresent_StaysOnline(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-pre-147"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend", "status"}).AddRow("", 0, "online")) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-pre-147", 0.0, "", 0, 60, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + mock.ExpectQuery("SELECT status, kind, last_register_failure_at FROM workspaces WHERE id ="). + WithArgs("ws-pre-147"). + WillReturnRows(sqlmock.NewRows([]string{"status", "kind", "last_register_failure_at"}).AddRow("online", "platform", nil)) + + mock.ExpectQuery("SELECT EXISTS"). + WithArgs("ws-pre-147"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + + // No listDeclaredPlugins query — the #3082 gate is skipped entirely for + // pre-#147 runtimes (mcp_server_present nil ⇒ platformAgentMCPServerPresent + // returns true under nil-tolerance; the new gate requires + // mcp_server_present != nil && *mcp_server_present). + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"workspace_id":"ws-pre-147","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60}` c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") diff --git a/workspace-server/internal/handlers/platform_agent.go b/workspace-server/internal/handlers/platform_agent.go index 57666d5e..d17f6e42 100644 --- a/workspace-server/internal/handlers/platform_agent.go +++ b/workspace-server/internal/handlers/platform_agent.go @@ -456,6 +456,28 @@ const conciergePlatformMCPSource = "gitea://molecule-ai/molecule-ai-plugin-molec // derivation, not the human label "molecule-platform-mcp". const conciergePlatformMCPName = "molecule-ai-plugin-molecule-platform-mcp" +// conciergePlatformMCPCreateWorkspaceTool is the literal MCP tool identifier +// the platform concierge must surface for the post-online fail-loud gate +// (core#3082) to consider the management MCP actually loaded. The Claude +// Code dispatcher formats every MCP tool as `mcp____`; the +// platform MCP server's install name derives to "molecule-platform" via +// PluginNameFromSource(conciergePlatformMCPSource), so the create_workspace +// tool's namespaced identifier is `mcp__molecule-platform__create_workspace`. +// This is the SAME literal the staging concierge E2E (tests/e2e/test_staging_ +// concierge_creates_workspace_e2e.sh:4.5/6) probes for; pinning it as a +// constant keeps the runtime gate and the E2E in lock-step so a drift in +// one breaks both with the same signal. +// +// Why we don't match the server/plugin NAME here: the heartbeat's +// loaded_mcp_tools list carries TOOL identifiers (mcp____), +// not plugin names. The previous check (loadedSet[conciergePlatformMCPName]) +// was a no-op false-green — it matched the plugin NAME against a list of +// TOOL strings, which would always be empty for the management MCP (the +// plugin is named "molecule-ai-plugin-molecule-platform-mcp" while its +// tools are namespaced "mcp__molecule-platform__*"). The literal-tool +// match below is the actual contract the runtime must satisfy. +const conciergePlatformMCPCreateWorkspaceTool = "mcp__molecule-platform__create_workspace" + // ensureConciergeProvider pins the concierge's LLM provider to `platform` (core // companion to ensureConciergeModel). It guarantees the env-level provider pin // that the runtime needs, independent of the template config.yaml (which is NOT diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 5660cd26..75d5c080 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -414,15 +414,27 @@ func (h *RegistryHandler) platformAgentMCPServerPresent(present *bool) bool { } // platformAgentManagementMCPLoaded reports whether the concierge's declared -// management MCP (workspace_declared_plugins) is present in the runtime's -// reported loaded_mcp_tools list. It returns true only when: -// - the workspace has the management plugin declared, AND -// - the reported loaded tool list does not contain the declared plugin name. +// management MCP is actually loaded into the LLM's runtime tool list. It +// returns true (caller marks degraded) only when: +// - the workspace has the management plugin declared in +// workspace_declared_plugins (the install NAME conciergePlatformMCPName), +// AND +// - the reported loaded tool list does NOT contain the literal required +// tool identifier (conciergePlatformMCPCreateWorkspaceTool). // -// If the management plugin is not declared (e.g. a non-platform workspace or a -// platform concierge before plugin reconciliation), it returns false. Errors -// are returned to the caller for logging; a failed lookup must not silently -// look healthy. +// Why this checks the TOOL identifier and not the plugin name: the heartbeat's +// loaded_mcp_tools carries namespaced tool ids (`mcp____`), not +// plugin names. The management MCP's server is "molecule-platform" (the +// PluginNameFromSource derivation), so its create_workspace tool is +// "mcp__molecule-platform__create_workspace" — a different value from the +// plugin name "molecule-ai-plugin-molecule-platform-mcp". Comparing the +// plugin NAME against TOOL ids was a no-op false-green (CR2 #12653). +// +// If the management plugin is not declared (non-platform workspace, or a +// platform concierge before plugin reconciliation), it returns false (NOT +// missing) so we don't false-alarm on workspaces that legitimately don't +// declare it. Errors are returned to the caller for logging; a failed +// lookup must not silently look healthy. func (h *RegistryHandler) platformAgentManagementMCPLoaded(ctx context.Context, workspaceID string, loaded []string) (bool, error) { declared, err := listDeclaredPlugins(ctx, workspaceID) if err != nil { @@ -440,11 +452,12 @@ func (h *RegistryHandler) platformAgentManagementMCPLoaded(ctx context.Context, return false, nil } - loadedSet := make(map[string]bool, len(loaded)) for _, t := range loaded { - loadedSet[t] = true + if t == conciergePlatformMCPCreateWorkspaceTool { + return false, nil + } } - return !loadedSet[conciergePlatformMCPName], nil + return true, nil } // markWorkspaceFailed updates a workspace row to status='failed' and broadcasts @@ -1313,22 +1326,53 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea } // core#3082: post-online fail-loud for a missing declared management MCP. - // If the runtime reports loaded_mcp_tools, compare it against the - // concierge's declared management plugin. A declared-but-not-loaded - // management MCP marks the workspace degraded and alerts instead of - // serving silently. - if payload.LoadedMCPTools != nil { - managementMissing, mErr := h.platformAgentManagementMCPLoaded(ctx, payload.WorkspaceID, payload.LoadedMCPTools) + // + // Triggered when the runtime AFFIRMATIVELY reports mcp_server_present=true + // (the #147 contract). For pre-#147 runtimes where the field is nil, + // platformAgentMCPServerPresent above already returned true under + // backward-compat — we DO NOT run the #3082 check in that case so + // legacy runtimes don't flip to degraded before the runtime-side + // loaded_mcp_tools producer lands. + // + // Once triggered, the gate has two fail-loud paths: + // - loaded_mcp_tools present but missing the required tool + // (mcp__molecule-platform__create_workspace) → degraded. + // - loaded_mcp_tools ABSENT (runtime says server is up but won't + // report the tools list) → degraded. This is the fail-loud + // behavior CR2+Researcher asked for: silent-skip when the runtime + // doesn't speak the new contract is exactly the false-green + // #3082 exists to catch. Runtime needs a loaded_mcp_tools + // producer (tracked separately — see PR #3101 PM flag). + if payload.MCPServerPresent != nil && *payload.MCPServerPresent { + loaded := payload.LoadedMCPTools + var ( + managementMissing bool + mErr error + absentToolsList bool + ) + if loaded == nil { + // Runtime speaks #147 (server_present=true) but omits the new + // loaded_mcp_tools producer → we cannot verify the specific + // required tool is loaded. Fail-loud. + managementMissing = true + absentToolsList = true + } else { + managementMissing, mErr = h.platformAgentManagementMCPLoaded(ctx, payload.WorkspaceID, loaded) + } if mErr != nil { log.Printf("Heartbeat: management MCP load check failed for %s: %v", payload.WorkspaceID, mErr) } else if managementMissing { msg := "platform agent management MCP declared but not loaded; marking degraded (core#3082)" + if absentToolsList { + msg = "platform agent runtime did not report loaded_mcp_tools on a mcp_server_present=true heartbeat; cannot verify create_workspace tool is loaded — marking degraded (core#3082)" + } log.Printf("Heartbeat: %s (workspace=%s)", msg, payload.WorkspaceID) if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, last_sample_error = $2, updated_at = now() WHERE id = $3 AND status = 'online'`, models.StatusDegraded, msg, payload.WorkspaceID); err != nil { log.Printf("Heartbeat: failed to mark %s degraded (management MCP missing): %v", payload.WorkspaceID, err) } h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{ "management_mcp_missing": true, + "loaded_mcp_tools_absent": absentToolsList, "sample_error": msg, }) } diff --git a/workspace-server/internal/models/workspace.go b/workspace-server/internal/models/workspace.go index 12f95699..5918c48c 100644 --- a/workspace-server/internal/models/workspace.go +++ b/workspace-server/internal/models/workspace.go @@ -159,11 +159,21 @@ type HeartbeatPayload struct { // would otherwise resurrect an mcp-less concierge (RCA #2970). MCPServerPresent *bool `json:"mcp_server_present,omitempty"` - // LoadedMCPTools is the list of MCP tool names the runtime reports as - // actually loaded for this workspace. For platform concierges, core - // cross-checks this against the declared management MCP so a missing - // plugin is surfaced as degraded instead of silent (core#3082). - // nil/omitted means the runtime does not yet speak this contract. + // LoadedMCPTools is the list of namespaced MCP tool identifiers the + // runtime reports as actually loaded for this workspace. For platform + // concierges, core cross-checks this against the declared management + // MCP so a missing plugin is surfaced as degraded instead of silent + // (core#3082, CR2 #12653 fix). Each entry is a Claude Code dispatcher + // id of the form `mcp____`; the platform MCP's required + // tool is `mcp__molecule-platform__create_workspace` (see + // conciergePlatformMCPCreateWorkspaceTool). + // + // On a heartbeat where mcp_server_present=true and LoadedMCPTools is + // nil/omitted, the #3082 gate fails loud (degraded) — the runtime + // spoke the #147 contract but omitted the new loaded_mcp_tools + // producer, so we cannot verify the specific required tool is loaded. + // Runtime needs a loaded_mcp_tools producer to make the deployed path + // healthy (tracked separately — see PR #3101 PM flag). LoadedMCPTools []string `json:"loaded_mcp_tools,omitempty"` } -- 2.52.0 From 479be9921639403f4bd04a78158ac68c43f76311 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 20 Jun 2026 22:46:41 +0000 Subject: [PATCH 3/5] fix(core#3082): fail-loud when declared management MCP lookup errors CR2 #12653 follow-up: platformAgentManagementMCPLoaded already returned errors, but the heartbeat caller only logged them and left the workspace online. Now a listDeclaredPlugins error marks the platform concierge degraded and broadcasts WORKSPACE_DEGRADED, matching the helper's 'a failed lookup must not silently look healthy' contract. Adds TestHeartbeatHandler_PlatformManagementMCPLookupError_FlipsOnlineToDegraded. --- .../handlers/concierge_mcp_loaded_test.go | 64 +++++++++++++++++++ .../internal/handlers/registry.go | 14 +++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go index b0e342b0..eb20ce49 100644 --- a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go +++ b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go @@ -2,6 +2,7 @@ package handlers import ( "bytes" + "errors" "net/http" "net/http/httptest" "testing" @@ -232,3 +233,66 @@ func TestHeartbeatHandler_Pre147RuntimeNoMCPServerPresent_StaysOnline(t *testing t.Errorf("unmet sqlmock expectations: %v", err) } } + +// TestHeartbeatHandler_PlatformManagementMCPLookupError_FlipsOnlineToDegraded +// verifies that a failure to read workspace_declared_plugins is fail-loud: +// the workspace is marked degraded rather than staying online with an +// unverified management MCP. This closes the false-green path where a broken +// lookup silently looked healthy (CR2 #12653 follow-up). +func TestHeartbeatHandler_PlatformManagementMCPLookupError_FlipsOnlineToDegraded(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + // Initial heartbeat UPDATE. + mock.ExpectQuery("SELECT COALESCE\\(current_task"). + WithArgs("ws-mcp-lookup-err"). + WillReturnRows(sqlmock.NewRows([]string{"current_task", "monthly_spend", "status"}).AddRow("", 0, "online")) + + mock.ExpectExec("UPDATE workspaces SET"). + WithArgs("ws-mcp-lookup-err", 0.0, "", 0, 60, ""). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // evaluateStatus: currentStatus=online, kind=platform. + mock.ExpectQuery("SELECT status, kind, last_register_failure_at FROM workspaces WHERE id ="). + WithArgs("ws-mcp-lookup-err"). + WillReturnRows(sqlmock.NewRows([]string{"status", "kind", "last_register_failure_at"}).AddRow("online", "platform", nil)) + + // platformAgentHasModelSecret: model secret exists. + mock.ExpectQuery("SELECT EXISTS"). + WithArgs("ws-mcp-lookup-err"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + + // platformAgentManagementMCPLoaded: listDeclaredPlugins fails. + mock.ExpectQuery("SELECT plugin_name, source_raw FROM workspace_declared_plugins"). + WithArgs("ws-mcp-lookup-err"). + WillReturnError(errors.New("connection refused")) + + // Degraded UPDATE — lookup failure must not silently look healthy. + mock.ExpectExec("UPDATE workspaces SET status =.*status = 'online'"). + WithArgs(models.StatusDegraded, "platform agent declared management MCP lookup failed: listDeclaredPlugins: query: connection refused; marking degraded (core#3082)", "ws-mcp-lookup-err"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // WORKSPACE_DEGRADED broadcast. + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + // Even though loaded_mcp_tools contains the required tool, the lookup error + // takes precedence and the workspace must degrade. + body := `{"workspace_id":"ws-mcp-lookup-err","error_rate":0.0,"sample_error":"","active_tasks":0,"uptime_seconds":60,"mcp_server_present":true,"loaded_mcp_tools":["` + conciergePlatformMCPCreateWorkspaceTool + `"]}` + c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Heartbeat(c) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 75d5c080..7035ec3d 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -433,8 +433,8 @@ func (h *RegistryHandler) platformAgentMCPServerPresent(present *bool) bool { // If the management plugin is not declared (non-platform workspace, or a // platform concierge before plugin reconciliation), it returns false (NOT // missing) so we don't false-alarm on workspaces that legitimately don't -// declare it. Errors are returned to the caller for logging; a failed -// lookup must not silently look healthy. +// declare it. Errors are returned to the caller and MUST be treated as +// fail-loud/degraded — a failed lookup must not silently look healthy. func (h *RegistryHandler) platformAgentManagementMCPLoaded(ctx context.Context, workspaceID string, loaded []string) (bool, error) { declared, err := listDeclaredPlugins(ctx, workspaceID) if err != nil { @@ -1360,7 +1360,15 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea managementMissing, mErr = h.platformAgentManagementMCPLoaded(ctx, payload.WorkspaceID, loaded) } if mErr != nil { - log.Printf("Heartbeat: management MCP load check failed for %s: %v", payload.WorkspaceID, mErr) + msg := fmt.Sprintf("platform agent declared management MCP lookup failed: %v; marking degraded (core#3082)", mErr) + log.Printf("Heartbeat: %s (workspace=%s)", msg, payload.WorkspaceID) + if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, last_sample_error = $2, updated_at = now() WHERE id = $3 AND status = 'online'`, models.StatusDegraded, msg, payload.WorkspaceID); err != nil { + log.Printf("Heartbeat: failed to mark %s degraded (management MCP lookup error): %v", payload.WorkspaceID, err) + } + h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{ + "management_mcp_lookup_failed": true, + "sample_error": msg, + }) } else if managementMissing { msg := "platform agent management MCP declared but not loaded; marking degraded (core#3082)" if absentToolsList { -- 2.52.0 From da115fd6c37eb90a64d280188aa46eec60c38f20 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 20 Jun 2026 22:51:19 +0000 Subject: [PATCH 4/5] fix(core#3082): remove redundant listDeclaredPlugins prefix in error message The wrapper in platformAgentManagementMCPLoaded duplicated the inner prefix, causing the heartbeat error message to read 'listDeclaredPlugins: listDeclaredPlugins: query: ...'. Use a distinct 'declared-plugin lookup' wrapper so the logged/broadcast message is clear and the regression test matches. --- workspace-server/internal/handlers/concierge_mcp_loaded_test.go | 2 +- workspace-server/internal/handlers/registry.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go index eb20ce49..afda1dfd 100644 --- a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go +++ b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go @@ -271,7 +271,7 @@ func TestHeartbeatHandler_PlatformManagementMCPLookupError_FlipsOnlineToDegraded // Degraded UPDATE — lookup failure must not silently look healthy. mock.ExpectExec("UPDATE workspaces SET status =.*status = 'online'"). - WithArgs(models.StatusDegraded, "platform agent declared management MCP lookup failed: listDeclaredPlugins: query: connection refused; marking degraded (core#3082)", "ws-mcp-lookup-err"). + WithArgs(models.StatusDegraded, "platform agent declared management MCP lookup failed: declared-plugin lookup: listDeclaredPlugins: query: connection refused; marking degraded (core#3082)", "ws-mcp-lookup-err"). WillReturnResult(sqlmock.NewResult(0, 1)) // WORKSPACE_DEGRADED broadcast. diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 7035ec3d..6c4f8c78 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -438,7 +438,7 @@ func (h *RegistryHandler) platformAgentMCPServerPresent(present *bool) bool { func (h *RegistryHandler) platformAgentManagementMCPLoaded(ctx context.Context, workspaceID string, loaded []string) (bool, error) { declared, err := listDeclaredPlugins(ctx, workspaceID) if err != nil { - return false, fmt.Errorf("listDeclaredPlugins: %w", err) + return false, fmt.Errorf("declared-plugin lookup: %w", err) } hasDeclaredManagement := false -- 2.52.0 From ffb9872e65e811daa7c6b67843d7d6ebc5a96259 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 20 Jun 2026 22:55:00 +0000 Subject: [PATCH 5/5] test(core#3082): make lookup-error regression test resilient to error string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use sqlmock.AnyArg() for the human-readable sample_error in the degraded UPDATE expectation. The test now asserts the lookup-error → degraded-flip and WORKSPACE_DEGRADED broadcast without coupling to the exact wrapped listDeclaredPlugins error message. --- .../internal/handlers/concierge_mcp_loaded_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go index afda1dfd..fb5a6cd5 100644 --- a/workspace-server/internal/handlers/concierge_mcp_loaded_test.go +++ b/workspace-server/internal/handlers/concierge_mcp_loaded_test.go @@ -270,8 +270,10 @@ func TestHeartbeatHandler_PlatformManagementMCPLookupError_FlipsOnlineToDegraded WillReturnError(errors.New("connection refused")) // Degraded UPDATE — lookup failure must not silently look healthy. + // Use AnyArg for the human-readable message so the test is not brittle + // against the exact wrapped error string produced by listDeclaredPlugins. mock.ExpectExec("UPDATE workspaces SET status =.*status = 'online'"). - WithArgs(models.StatusDegraded, "platform agent declared management MCP lookup failed: declared-plugin lookup: listDeclaredPlugins: query: connection refused; marking degraded (core#3082)", "ws-mcp-lookup-err"). + WithArgs(models.StatusDegraded, sqlmock.AnyArg(), "ws-mcp-lookup-err"). WillReturnResult(sqlmock.NewResult(0, 1)) // WORKSPACE_DEGRADED broadcast. -- 2.52.0