diff --git a/workspace-server/internal/handlers/platform_agent.go b/workspace-server/internal/handlers/platform_agent.go index 23ea4cefd..8e6da8e63 100644 --- a/workspace-server/internal/handlers/platform_agent.go +++ b/workspace-server/internal/handlers/platform_agent.go @@ -103,25 +103,51 @@ behalf, and keep them in the loop. // conciergeMCPServersBlock is the YAML appended to the concierge's config.yaml // so the runtime loads the org-admin platform MCP alongside the always-on a2a // server. The Phase-2 extra-MCP merge (claude_sdk_executor.py -// _apply_extra_mcp_servers) reads this `mcp_servers:` list. The platform MCP -// authenticates purely from the container env (MOLECULE_API_KEY / -// MOLECULE_API_URL / MOLECULE_ORG_ID — wired by conciergePlatformMCPEnv), so no -// per-server env block is needed here. +// _apply_extra_mcp_servers) reads this `mcp_servers:` list. +// +// Entry shape pins the REAL image contract (agents-team pilot RCA, +// 2026-06-10 — the previous block pointed at a /opt/molecule-mcp-server +// path the image never shipped): +// - command `molecule-platform-mcp` — Dockerfile.platform-agent symlinks +// the npm-installed @molecule-ai/mcp-server bin under this UNAMBIGUOUS +// name. The package's own bin name (`molecule-mcp`) COLLIDES with the +// runtime wheel's Python a2a inbox bridge at /usr/local/bin/molecule-mcp, +// which wins on PATH — the pilot's second-stage failure (2026-06-10): +// the config resolved to the Python bridge and the agent got a duplicate +// a2a server instead of the management registry. +// - env MOLECULE_MCP_MODE=management — the SAME binary serves the +// 21-tool workspace a2a registry by default; only management mode +// registers the org-admin tools (list_workspaces et al). Without it +// the concierge gets a duplicate a2a server and zero admin tools. +// +// Auth comes from the container env (MOLECULE_API_KEY / MOLECULE_API_URL / +// MOLECULE_ORG_ID — wired by conciergePlatformMCPEnv); MCP-host env merges +// over process env, so the mode flag composes with those. // // SELF-HOST CAVEAT: the local stack provisions the concierge on the ordinary -// `claude-code` image, which does NOT ship /opt/molecule-mcp-server. The -// dedicated `platform-agent` image (Dockerfile.platform-agent) does. The -// executor's _apply_extra_mcp_servers skips an entry whose command/script is +// `claude-code` image, which does NOT ship the molecule-platform-mcp bin. The +// executor's _apply_extra_mcp_servers skips an entry whose command is // absent, so declaring this block can never crash the agent or wedge the SDK // init locally — the identity (system prompt) works everywhere; the org-admin // MCP tools only light up on the platform-agent image. const conciergeMCPServersBlock = `mcp_servers: - name: platform - command: node - args: - - /opt/molecule-mcp-server/dist/index.js + command: molecule-platform-mcp + env: + MOLECULE_MCP_MODE: management ` +// conciergeMCPFragmentFile is the standalone overlay fragment carrying the +// SAME declaration as conciergeMCPServersBlock. Written UNCONDITIONALLY by +// conciergeIdentityFiles — unlike the config.yaml append, it does not depend +// on resolving a base config. On the SaaS restart-provision path all three +// base resolutions miss (no in-memory configFiles, no templatePath, no +// exec-readable container), so the appended block silently never shipped and +// the concierge booted without its admin MCP (the pilot's TOOLS-FAIL). +// The runtime executor merges /configs/mcp_servers.yaml after config.yaml; +// older runtimes ignore the extra file — strictly additive. +const conciergeMCPFragmentFile = "mcp_servers.yaml" + // SelfHostedPlatformAgentID is the deterministic platform-agent id used when no // control plane is present to derive a per-org id (self-hosted / local). There // is one platform agent per self-hosted tenant, so a fixed namespaced uuidv5 is @@ -159,6 +185,11 @@ func defaultPlatformAgentName() string { func conciergeIdentityFiles(name string, baseConfigYAML []byte) map[string][]byte { files := map[string][]byte{ "system-prompt.md": []byte(fmt.Sprintf(conciergeSystemPromptTmpl, name)), + // Always-shipped fragment: declares the platform MCP regardless of + // whether a base config.yaml was resolvable (see + // conciergeMCPFragmentFile). Idempotent — fixed content, re-seeded + // every provision cycle, never touches config.yaml. + conciergeMCPFragmentFile: []byte(conciergeMCPServersBlock), } if len(baseConfigYAML) > 0 && !strings.Contains(string(baseConfigYAML), "\nmcp_servers:") && !strings.HasPrefix(string(baseConfigYAML), "mcp_servers:") { @@ -184,6 +215,15 @@ func conciergePlatformMCPEnv(env map[string]string) { } } setIfAbsent("MOLECULE_API_KEY", os.Getenv("ADMIN_TOKEN")) + // The management-mode tool registry (mcp-server >=1.5.0, + // src/tools/management/client.ts) authenticates with + // MOLECULE_ORG_API_KEY — a distinct env from the connectivity-preflight + // MOLECULE_API_KEY. The tenant ADMIN_TOKEN is a valid bearer for the + // tenant-admin surface those tools call (same header shape as the + // install/restart curls), so wire it under both names. Verified live on + // the agents-team pilot: with only MOLECULE_API_KEY set, every + // management tool returns AUTH_ERROR. + setIfAbsent("MOLECULE_ORG_API_KEY", os.Getenv("ADMIN_TOKEN")) // MOLECULE_API_URL: prefer an explicit env, else the in-cluster platform URL. apiURL := os.Getenv("MOLECULE_API_URL") if apiURL == "" { diff --git a/workspace-server/internal/handlers/platform_agent_test.go b/workspace-server/internal/handlers/platform_agent_test.go index 11afa8ef6..3e7827063 100644 --- a/workspace-server/internal/handlers/platform_agent_test.go +++ b/workspace-server/internal/handlers/platform_agent_test.go @@ -369,11 +369,30 @@ func TestConciergeIdentityFiles(t *testing.T) { if !ok { t.Fatal("overlay missing config.yaml (mcp_servers should have been appended)") } - for _, want := range []string{"mcp_servers:", "name: platform", "command: node", "/opt/molecule-mcp-server/dist/index.js", "runtime: claude-code"} { + // Pins the REAL image contract (pilot RCA 2026-06-10): the bin on PATH + // + management mode — NOT the /opt node path the image never shipped, + // and NOT default (a2a) mode which has zero admin tools. + for _, want := range []string{"mcp_servers:", "name: platform", "command: molecule-platform-mcp", "MOLECULE_MCP_MODE: management", "runtime: claude-code"} { if !strings.Contains(string(cfg), want) { t.Errorf("config.yaml missing %q\n--- got ---\n%s", want, cfg) } } + if strings.Contains(string(cfg), "/opt/molecule-mcp-server") { + t.Error("stale /opt path resurfaced — the image ships the molecule-mcp bin, not /opt/molecule-mcp-server") + } + + // The standalone fragment ships ALWAYS, carrying the same declaration — + // the base-independent path that survives the SaaS restart-provision + // (where no base config is resolvable). + frag, ok := files[conciergeMCPFragmentFile] + if !ok { + t.Fatalf("overlay missing %s (the base-independent MCP declaration)", conciergeMCPFragmentFile) + } + for _, want := range []string{"name: platform", "command: molecule-platform-mcp", "MOLECULE_MCP_MODE: management"} { + if !strings.Contains(string(frag), want) { + t.Errorf("%s missing %q", conciergeMCPFragmentFile, want) + } + } // Idempotent: re-applying onto an already-patched config does NOT add a // second mcp_servers block and does NOT emit a config.yaml overlay (nothing @@ -386,7 +405,10 @@ func TestConciergeIdentityFiles(t *testing.T) { t.Errorf("mcp_servers: appears %d times, want exactly 1", n) } - // No base config (couldn't read one): identity still lands; no config.yaml. + // No base config (couldn't read one): identity still lands; no config.yaml + // — but the fragment STILL ships, so the MCP declaration reaches the + // container even when every base resolution misses (the exact SaaS + // restart-provision gap that booted the pilot concierge toolless). only := conciergeIdentityFiles("Org Concierge", nil) if _, present := only["system-prompt.md"]; !present { t.Error("system prompt must land even with no base config") @@ -394,6 +416,9 @@ func TestConciergeIdentityFiles(t *testing.T) { if _, present := only["config.yaml"]; present { t.Error("no config.yaml overlay when there is no base to append onto") } + if _, present := only[conciergeMCPFragmentFile]; !present { + t.Errorf("%s must ship even with no base config", conciergeMCPFragmentFile) + } } // TestConciergePlatformMCPEnv asserts the platform-MCP env wiring: ADMIN_TOKEN → @@ -461,12 +486,18 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) { if _, ok := env["MOLECULE_API_KEY"]; ok { t.Errorf("SECURITY: ordinary workspace leaked MOLECULE_API_KEY (org-admin token): %v", env) } + if _, ok := env["MOLECULE_ORG_API_KEY"]; ok { + t.Errorf("SECURITY: ordinary workspace leaked MOLECULE_ORG_API_KEY: %v", env) + } if _, ok := out["system-prompt.md"]; ok { t.Error("ordinary workspace was given the concierge system prompt") } if strings.Contains(string(out["config.yaml"]), "mcp_servers") { t.Error("SECURITY: ordinary workspace was given the platform mcp_servers config") } + if _, ok := out[conciergeMCPFragmentFile]; ok { + t.Errorf("SECURITY: ordinary workspace was given %s", conciergeMCPFragmentFile) + } if err := mock.ExpectationsWereMet(); err != nil { t.Errorf("unmet sqlmock expectations: %v", err) } @@ -482,6 +513,9 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) { if env["MOLECULE_API_KEY"] != "secret-org-admin" { t.Errorf("concierge did not receive the org-admin token; env=%v", env) } + if env["MOLECULE_ORG_API_KEY"] != "secret-org-admin" { + t.Errorf("management tools auth env (MOLECULE_ORG_API_KEY) missing; env=%v", env) + } if _, ok := out["system-prompt.md"]; !ok { t.Error("concierge did not receive the system prompt") }