fix(concierge): correct platform-MCP declaration + ship it base-independently #2522

Merged
agent-reviewer merged 2 commits from fix/concierge-mcp-declaration into main 2026-06-10 14:17:24 +00:00
2 changed files with 86 additions and 12 deletions
@@ -103,25 +103,51 @@ behalf, and keep them in the loop.
// conciergeMCPServersBlock is the YAML appended to the concierge's config.yaml
// so the runtime loads the org-admin platform MCP alongside the always-on a2a
// server. The Phase-2 extra-MCP merge (claude_sdk_executor.py
// _apply_extra_mcp_servers) reads this `mcp_servers:` list. The platform MCP
// authenticates purely from the container env (MOLECULE_API_KEY /
// MOLECULE_API_URL / MOLECULE_ORG_ID — wired by conciergePlatformMCPEnv), so no
// per-server env block is needed here.
// _apply_extra_mcp_servers) reads this `mcp_servers:` list.
//
// Entry shape pins the REAL image contract (agents-team pilot RCA,
// 2026-06-10 — the previous block pointed at a /opt/molecule-mcp-server
// path the image never shipped):
// - command `molecule-platform-mcp` — Dockerfile.platform-agent symlinks
// the npm-installed @molecule-ai/mcp-server bin under this UNAMBIGUOUS
// name. The package's own bin name (`molecule-mcp`) COLLIDES with the
// runtime wheel's Python a2a inbox bridge at /usr/local/bin/molecule-mcp,
// which wins on PATH — the pilot's second-stage failure (2026-06-10):
// the config resolved to the Python bridge and the agent got a duplicate
// a2a server instead of the management registry.
// - env MOLECULE_MCP_MODE=management — the SAME binary serves the
// 21-tool workspace a2a registry by default; only management mode
// registers the org-admin tools (list_workspaces et al). Without it
// the concierge gets a duplicate a2a server and zero admin tools.
//
// Auth comes from the container env (MOLECULE_API_KEY / MOLECULE_API_URL /
// MOLECULE_ORG_ID — wired by conciergePlatformMCPEnv); MCP-host env merges
// over process env, so the mode flag composes with those.
//
// SELF-HOST CAVEAT: the local stack provisions the concierge on the ordinary
// `claude-code` image, which does NOT ship /opt/molecule-mcp-server. The
// dedicated `platform-agent` image (Dockerfile.platform-agent) does. The
// executor's _apply_extra_mcp_servers skips an entry whose command/script is
// `claude-code` image, which does NOT ship the molecule-platform-mcp bin. The
// executor's _apply_extra_mcp_servers skips an entry whose command is
// absent, so declaring this block can never crash the agent or wedge the SDK
// init locally — the identity (system prompt) works everywhere; the org-admin
// MCP tools only light up on the platform-agent image.
const conciergeMCPServersBlock = `mcp_servers:
- name: platform
command: node
args:
- /opt/molecule-mcp-server/dist/index.js
command: molecule-platform-mcp
env:
MOLECULE_MCP_MODE: management
`
// conciergeMCPFragmentFile is the standalone overlay fragment carrying the
// SAME declaration as conciergeMCPServersBlock. Written UNCONDITIONALLY by
// conciergeIdentityFiles — unlike the config.yaml append, it does not depend
// on resolving a base config. On the SaaS restart-provision path all three
// base resolutions miss (no in-memory configFiles, no templatePath, no
// exec-readable container), so the appended block silently never shipped and
// the concierge booted without its admin MCP (the pilot's TOOLS-FAIL).
// The runtime executor merges /configs/mcp_servers.yaml after config.yaml;
// older runtimes ignore the extra file — strictly additive.
const conciergeMCPFragmentFile = "mcp_servers.yaml"
// SelfHostedPlatformAgentID is the deterministic platform-agent id used when no
// control plane is present to derive a per-org id (self-hosted / local). There
// is one platform agent per self-hosted tenant, so a fixed namespaced uuidv5 is
@@ -159,6 +185,11 @@ func defaultPlatformAgentName() string {
func conciergeIdentityFiles(name string, baseConfigYAML []byte) map[string][]byte {
files := map[string][]byte{
"system-prompt.md": []byte(fmt.Sprintf(conciergeSystemPromptTmpl, name)),
// Always-shipped fragment: declares the platform MCP regardless of
// whether a base config.yaml was resolvable (see
// conciergeMCPFragmentFile). Idempotent — fixed content, re-seeded
// every provision cycle, never touches config.yaml.
conciergeMCPFragmentFile: []byte(conciergeMCPServersBlock),
}
if len(baseConfigYAML) > 0 && !strings.Contains(string(baseConfigYAML), "\nmcp_servers:") &&
!strings.HasPrefix(string(baseConfigYAML), "mcp_servers:") {
@@ -184,6 +215,15 @@ func conciergePlatformMCPEnv(env map[string]string) {
}
}
setIfAbsent("MOLECULE_API_KEY", os.Getenv("ADMIN_TOKEN"))
// The management-mode tool registry (mcp-server >=1.5.0,
// src/tools/management/client.ts) authenticates with
// MOLECULE_ORG_API_KEY — a distinct env from the connectivity-preflight
// MOLECULE_API_KEY. The tenant ADMIN_TOKEN is a valid bearer for the
// tenant-admin surface those tools call (same header shape as the
// install/restart curls), so wire it under both names. Verified live on
// the agents-team pilot: with only MOLECULE_API_KEY set, every
// management tool returns AUTH_ERROR.
setIfAbsent("MOLECULE_ORG_API_KEY", os.Getenv("ADMIN_TOKEN"))
// MOLECULE_API_URL: prefer an explicit env, else the in-cluster platform URL.
apiURL := os.Getenv("MOLECULE_API_URL")
if apiURL == "" {
@@ -369,11 +369,30 @@ func TestConciergeIdentityFiles(t *testing.T) {
if !ok {
t.Fatal("overlay missing config.yaml (mcp_servers should have been appended)")
}
for _, want := range []string{"mcp_servers:", "name: platform", "command: node", "/opt/molecule-mcp-server/dist/index.js", "runtime: claude-code"} {
// Pins the REAL image contract (pilot RCA 2026-06-10): the bin on PATH
// + management mode — NOT the /opt node path the image never shipped,
// and NOT default (a2a) mode which has zero admin tools.
for _, want := range []string{"mcp_servers:", "name: platform", "command: molecule-platform-mcp", "MOLECULE_MCP_MODE: management", "runtime: claude-code"} {
if !strings.Contains(string(cfg), want) {
t.Errorf("config.yaml missing %q\n--- got ---\n%s", want, cfg)
}
}
if strings.Contains(string(cfg), "/opt/molecule-mcp-server") {
t.Error("stale /opt path resurfaced — the image ships the molecule-mcp bin, not /opt/molecule-mcp-server")
}
// The standalone fragment ships ALWAYS, carrying the same declaration —
// the base-independent path that survives the SaaS restart-provision
// (where no base config is resolvable).
frag, ok := files[conciergeMCPFragmentFile]
if !ok {
t.Fatalf("overlay missing %s (the base-independent MCP declaration)", conciergeMCPFragmentFile)
}
for _, want := range []string{"name: platform", "command: molecule-platform-mcp", "MOLECULE_MCP_MODE: management"} {
if !strings.Contains(string(frag), want) {
t.Errorf("%s missing %q", conciergeMCPFragmentFile, want)
}
}
// Idempotent: re-applying onto an already-patched config does NOT add a
// second mcp_servers block and does NOT emit a config.yaml overlay (nothing
@@ -386,7 +405,10 @@ func TestConciergeIdentityFiles(t *testing.T) {
t.Errorf("mcp_servers: appears %d times, want exactly 1", n)
}
// No base config (couldn't read one): identity still lands; no config.yaml.
// No base config (couldn't read one): identity still lands; no config.yaml
// — but the fragment STILL ships, so the MCP declaration reaches the
// container even when every base resolution misses (the exact SaaS
// restart-provision gap that booted the pilot concierge toolless).
only := conciergeIdentityFiles("Org Concierge", nil)
if _, present := only["system-prompt.md"]; !present {
t.Error("system prompt must land even with no base config")
@@ -394,6 +416,9 @@ func TestConciergeIdentityFiles(t *testing.T) {
if _, present := only["config.yaml"]; present {
t.Error("no config.yaml overlay when there is no base to append onto")
}
if _, present := only[conciergeMCPFragmentFile]; !present {
t.Errorf("%s must ship even with no base config", conciergeMCPFragmentFile)
}
}
// TestConciergePlatformMCPEnv asserts the platform-MCP env wiring: ADMIN_TOKEN →
@@ -461,12 +486,18 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) {
if _, ok := env["MOLECULE_API_KEY"]; ok {
t.Errorf("SECURITY: ordinary workspace leaked MOLECULE_API_KEY (org-admin token): %v", env)
}
if _, ok := env["MOLECULE_ORG_API_KEY"]; ok {
t.Errorf("SECURITY: ordinary workspace leaked MOLECULE_ORG_API_KEY: %v", env)
}
if _, ok := out["system-prompt.md"]; ok {
t.Error("ordinary workspace was given the concierge system prompt")
}
if strings.Contains(string(out["config.yaml"]), "mcp_servers") {
t.Error("SECURITY: ordinary workspace was given the platform mcp_servers config")
}
if _, ok := out[conciergeMCPFragmentFile]; ok {
t.Errorf("SECURITY: ordinary workspace was given %s", conciergeMCPFragmentFile)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
@@ -482,6 +513,9 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) {
if env["MOLECULE_API_KEY"] != "secret-org-admin" {
t.Errorf("concierge did not receive the org-admin token; env=%v", env)
}
if env["MOLECULE_ORG_API_KEY"] != "secret-org-admin" {
t.Errorf("management tools auth env (MOLECULE_ORG_API_KEY) missing; env=%v", env)
}
if _, ok := out["system-prompt.md"]; !ok {
t.Error("concierge did not receive the system prompt")
}