auto-heal: re-declare molecule-platform-mcp before restarting vanilla concierge #3051

Merged
devops-engineer merged 1 commits from fix/3047-redeclare-platform-mcp-on-boot into main 2026-06-19 11:49:49 +00:00
2 changed files with 19 additions and 6 deletions
@@ -613,18 +613,22 @@ func MaybeProvisionPlatformAgentOnBoot(ctx context.Context, database *sql.DB, pr
// Already online AND a live container? Then it's running — but it may be a
// concierge that pre-dates the identity overlay (booted as a vanilla
// claude-code agent with no system-prompt.md). Probe for the concierge
// identity; if it's missing, restart ONCE so the provision path re-seeds the
// overlay. This is what makes the seed idempotent + self-applying on the
// EXISTING concierge (the deterministic self-hosted id), not just new
// installs. IsRunning is the authoritative liveness check; status is the
// cheap one.
// identity; if it's missing, re-declare the management MCP plugin in the DB
// BEFORE restarting so the post-restart reconcile + boot-install see it, then
// restart ONCE so the provision path re-seeds the overlay. This is what makes
// the seed idempotent + self-applying on the EXISTING concierge (the
// deterministic self-hosted id), not just new installs. IsRunning is the
// authoritative liveness check; status is the cheap one.
running, _ := prov.IsRunning(ctx, id)
if running {
if conciergeIdentityPresent(ctx, prov, id) {
log.Printf("boot: platform-agent %s already running with concierge identity — skipping", id)
return
}
log.Printf("boot: platform-agent %s running but MISSING concierge identity — restarting once to apply the system prompt + platform MCP", id)
log.Printf("boot: platform-agent %s running but MISSING concierge identity — re-declaring management MCP and restarting once to apply the system prompt + platform MCP", id)
if rec, skip := seedTemplatePlugins(ctx, id, []string{conciergePlatformMCPPlugin}); skip > 0 {
log.Printf("boot: concierge %s could not re-declare %q plugin (recorded=%d skipped=%d) — management MCP may be absent until next provision", id, conciergePlatformMCPPlugin, rec, skip)
}
go restartByID(id)
return
}
@@ -312,6 +312,15 @@ func TestMaybeProvisionPlatformAgentOnBoot_RestartsRunningButVanilla(t *testing.
mock.ExpectQuery(`SELECT id, status FROM workspaces WHERE kind = 'platform'`).
WillReturnRows(sqlmock.NewRows([]string{"id", "status"}).AddRow(bootPlatformID, "online"))
// The running-but-vanilla path re-declares the management MCP plugin before
// restarting so the post-restart boot-install sees the declaration.
const kindQuery = `SELECT COALESCE\(kind, 'workspace'\) FROM workspaces WHERE id =`
mock.ExpectQuery(kindQuery).WithArgs(bootPlatformID).
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
mock.ExpectExec(`INSERT INTO workspace_declared_plugins`).
WithArgs(bootPlatformID, conciergePlatformMCPPlugin, sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
// Running, but ExecRead of system-prompt.md returns vanilla content (no
// "Org Concierge") → identity absent → restart.
prov := &stubBootProvExec{stubBootProv: stubBootProv{running: true}, systemPrompt: "generic coding assistant"}