diff --git a/workspace-server/internal/handlers/concierge_mcp_present_test.go b/workspace-server/internal/handlers/concierge_mcp_present_test.go new file mode 100644 index 00000000..9ccdbb5f --- /dev/null +++ b/workspace-server/internal/handlers/concierge_mcp_present_test.go @@ -0,0 +1,21 @@ +package handlers + +import "testing" + +// #2989 rollout-safety (2026-06-18): nil (runtime predating #147, field absent) +// must be treated as ALLOW, not fail-closed — else the gate takes every +// pre-#147 concierge offline. Only an explicit false fail-closes. +func TestPlatformAgentMCPServerPresent_NilTolerance(t *testing.T) { + h := &RegistryHandler{} + tt := true + ff := false + if !h.platformAgentMCPServerPresent(nil) { + t.Error("nil (old runtime, field absent) must be ALLOW, got block") + } + if !h.platformAgentMCPServerPresent(&tt) { + t.Error("&true must be allow") + } + if h.platformAgentMCPServerPresent(&ff) { + t.Error("&false (runtime reports MCP absent) must fail-closed") + } +} diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 583896c6..b6cce98a 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -393,12 +393,24 @@ func (h *RegistryHandler) platformAgentHasModelSecret(ctx context.Context, works return exists, err } -// platformAgentMCPServerPresent reports whether the runtime declared the -// platform-agent image's /opt/molecule-mcp-server binary present. The payload -// field is a pointer so an absent declaration (nil) is treated as false — -// fail-closed: an old/generic runtime cannot prove it has the concierge MCP. +// platformAgentMCPServerPresent reports whether the concierge's MCP server +// should be considered present for the fail-closed online gate (#2970). +// +// The payload field is a pointer to distinguish three states: +// - nil → the runtime did NOT report the field at all. This is a runtime +// PREDATING the #147 contract (mcp_server_present on register/heartbeat). +// The concierge image bakes /opt/molecule-mcp-server unconditionally, so an +// old runtime that simply can't SPEAK the contract must NOT be fail-closed — +// that would take every concierge offline the moment this gate deploys ahead +// of the runtime release that adds the field (the exact rollout-order hazard +// that fail-closed #2989 + a pre-#147 concierge image hit on 2026-06-18: +// test3 + the fleet would be marked failed despite a present MCP binary). +// Treat nil as ALLOW (unknown ⇒ don't block) for backward-compat. +// - &true → runtime affirmatively reports the binary present → allow. +// - &false → a #147-aware runtime affirmatively reports it ABSENT → fail-closed +// (the real signal this gate exists to catch). func (h *RegistryHandler) platformAgentMCPServerPresent(present *bool) bool { - return present != nil && *present + return present == nil || *present } // markWorkspaceFailed updates a workspace row to status='failed' and broadcasts