refactor(core): RFC #2843 §10a — de-hardcode concierge identity into platform-agent template #2919
+2
-1
@@ -30,7 +30,8 @@
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"},
|
||||
{"name": "platform-agent", "repo": "molecule-ai/molecule-ai-workspace-template-platform-agent", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
# Platform-agent image variant (RFC #2843 §10a IMAGE-BAKED).
|
||||
#
|
||||
# The platform-agent image is the concierge's dedicated image. The base
|
||||
# platform image (Dockerfile) is the ordinary /platform image; the
|
||||
# platform-agent variant EXTENDS the base with the concierge's IDENTITY
|
||||
# baked in, sourced FROM the platform-agent TEMPLATE REPO
|
||||
# (molecule-ai/molecule-ai-workspace-template-platform-agent) — the
|
||||
# SAME SSOT that the asset-channel delivers post-#29-activation.
|
||||
#
|
||||
# Why a dedicated image: the concierge is a platform-managed agent
|
||||
# (NOT a user template) with a different threat model and a different
|
||||
# identity-delivery requirement. The asset channel (PR-B, #2900+#2903)
|
||||
# works in SaaS+token, in SaaS+public-fetch, and (post-#29-activation)
|
||||
# in any tenant. The IMAGE-BAKED variant covers the remaining gap:
|
||||
# self-hosted deployments (no MOLECULE_TEMPLATE_REPO_TOKEN) and the
|
||||
# pre-#29-activation bootstrap window, where neither the asset channel
|
||||
# nor the local template path is guaranteed to be available.
|
||||
#
|
||||
# SSOT contract (driver hard-requirement on the IMAGE-BAKED impl):
|
||||
# The image-baked content (config.yaml + prompts/concierge.md +
|
||||
# mcp_servers.yaml) is SOURCED FROM the platform-agent TEMPLATE REPO,
|
||||
# NOT vendored/duplicated in core. A CI DRIFT-GATE
|
||||
# (workspace-server/internal/provisioner/platform_agent_image_drift_test.go,
|
||||
# pinned against /opt/molecule-platform-agent-template/{config.yaml,
|
||||
# mcp_servers.yaml,prompts/concierge.md} vs the pre-cloned
|
||||
# .tenant-bundle-deps/workspace-configs-templates/platform-agent/
|
||||
# source) asserts byte-equal at build time. A future drift would
|
||||
# fail the drift-gate test (go test -run TestPlatformAgentImageDriftGate),
|
||||
# catching it BEFORE the image is published — so image snapshot +
|
||||
# template can NEVER diverge in production without a CI-red signal.
|
||||
#
|
||||
# Build context: same as Dockerfile. The platform-agent template
|
||||
# content is pre-cloned by scripts/clone-manifest.sh into
|
||||
# .tenant-bundle-deps/workspace-configs-templates/platform-agent/
|
||||
# (the platform-agent template is a manifest.json workspace_templates
|
||||
# entry per RFC #2843 §10a), and this Dockerfile reads it via the
|
||||
# PLATFORM_AGENT_TEMPLATE_DIR build-arg (default = canonical CI path).
|
||||
#
|
||||
# Usage (operator / CI):
|
||||
# docker buildx build \
|
||||
# --build-arg PLATFORM_AGENT_TEMPLATE_DIR=.tenant-bundle-deps/workspace-configs-templates/platform-agent \
|
||||
# --tag ${REGISTRY}/molecule-ai/platform-platform-agent:staging-${GIT_SHA} \
|
||||
# -f workspace-server/Dockerfile.platform-agent \
|
||||
# .
|
||||
#
|
||||
# Runtime contract: a container started from this image has the
|
||||
# concierge identity at /opt/molecule-platform-agent-template/. The
|
||||
# pre-#29/self-host fallback path in the workspace-server's
|
||||
# applyConciergeProvisionConfig hook (workspace-server/internal/
|
||||
# handlers/platform_agent.go) reads from this path when the asset-
|
||||
# channel deliver is unavailable. Post-#29 activation, the asset
|
||||
# channel remains the SSOT-delivery path; the image-baked copy is
|
||||
# a last-resort fallback (intentionally NOT a parallel SSOT — the
|
||||
# drift-gate enforces single-SSOT).
|
||||
|
||||
ARG BASE_IMAGE=molecule-local/platform:latest
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
# PLATFORM-AGENT TEMPLATE CONTENT — SSOT-sourced from the pre-cloned
|
||||
# template repo. The default path mirrors where scripts/clone-manifest.sh
|
||||
# places workspace_templates entries
|
||||
# (.tenant-bundle-deps/workspace-configs-templates/<name>/). The
|
||||
# platform-agent template is in manifest.json's workspace_templates
|
||||
# (per RFC #2843 §10a), so the existing pre-clone step in
|
||||
# publish-workspace-server-image.yml populates this path with no
|
||||
# additional CI work.
|
||||
#
|
||||
# The build-arg exists for operators / staging mirrors that pre-clone
|
||||
# to a different dir (e.g. a shallow --depth=1 mirror for fast CI).
|
||||
# Default value is the canonical CI path; override only when the
|
||||
# pre-clone layout differs.
|
||||
#
|
||||
# Why build-arg, not ENV: the path is a BUILD-TIME input, not a
|
||||
# runtime config; build-args are the right tool and the value never
|
||||
# enters the running container.
|
||||
ARG PLATFORM_AGENT_TEMPLATE_DIR=.tenant-bundle-deps/workspace-configs-templates/platform-agent
|
||||
COPY ${PLATFORM_AGENT_TEMPLATE_DIR}/config.yaml /opt/molecule-platform-agent-template/config.yaml
|
||||
COPY ${PLATFORM_AGENT_TEMPLATE_DIR}/mcp_servers.yaml /opt/molecule-platform-agent-template/mcp_servers.yaml
|
||||
COPY ${PLATFORM_AGENT_TEMPLATE_DIR}/prompts/ /opt/molecule-platform-agent-template/prompts/
|
||||
|
||||
# PLATFORM-AGENT IDENTITY (image-baked fallback) — when the asset-
|
||||
# channel deliver is unavailable (self-host without
|
||||
# MOLECULE_TEMPLATE_REPO_TOKEN, pre-#29-activation bootstrap), the
|
||||
# concierge's identity (config.yaml + prompts/concierge.md +
|
||||
# mcp_servers.yaml) is read from the image-baked path. The runtime
|
||||
# must be aware of this fallback so a misconfigured operator sees
|
||||
# a clear log line (NOT a silent miss).
|
||||
RUN echo '#!/bin/sh' > /opt/molecule-platform-agent-template/IMAGE_BAKED_IDENTITY_PRESENT && \
|
||||
echo 'echo "platform-agent: image-baked identity present at /opt/molecule-platform-agent-template/ (last-resort fallback for self-host + pre-#29-activation)" >&2' >> /opt/molecule-platform-agent-template/IMAGE_BAKED_IDENTITY_PRESENT && \
|
||||
chmod +x /opt/molecule-platform-agent-template/IMAGE_BAKED_IDENTITY_PRESENT
|
||||
@@ -28,10 +28,8 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
|
||||
@@ -39,162 +37,21 @@ import (
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// conciergeSystemPrompt is the identity seeded into the platform agent's
|
||||
// /configs/system-prompt.md. It makes the concierge BE the Org Concierge —
|
||||
// the org root (kind='platform'), the user's universal A2A peer and default
|
||||
// chat target — instead of booting as a generic claude-code coding assistant.
|
||||
// The concierge system-prompt template, the concierge MCP servers block, the
|
||||
// concierge MCP fragment file (mcp_servers.yaml), the concierge runtime, the
|
||||
// concierge declared model, and the concierge identity files function were
|
||||
// REMOVED as part of RFC #2843 §10a (the concierge de-hardcode migration). The
|
||||
// concierge's identity — system prompt, model, runtime, MCP wiring — is now
|
||||
// delivered via the molecule-ai-workspace-template-platform-agent template
|
||||
// (manifest.json workspace_templates entry) and applied like any other runtime
|
||||
// template. ZERO concierge literals remain in core.
|
||||
//
|
||||
// Grounded in the RFC (docs/design/rfc-platform-agent.md §1-2): it IS the org,
|
||||
// orchestrates the org via the platform MCP (the 87-tool org-admin surface) +
|
||||
// a2a delegation, and routes destructive ops through human approval. The prompt
|
||||
// is identity-only and works LOCALLY regardless of whether the platform MCP
|
||||
// binary is present — the org-admin tools simply aren't available until the
|
||||
// agent runs on the dedicated platform-agent image.
|
||||
//
|
||||
// %s is the concierge's display name (defaultPlatformAgentName()).
|
||||
const conciergeSystemPromptTmpl = `# You are %s — the Org Concierge
|
||||
|
||||
You are the organization's **platform agent**: the single org-root agent
|
||||
(kind=platform) that sits above every workspace. You are the user's one front
|
||||
door to the whole organization — their universal peer and default chat target.
|
||||
You are NOT a generic coding assistant; you are an **org orchestrator**.
|
||||
|
||||
## What you are
|
||||
|
||||
- **You are the org.** Every team and workspace in this organization lives under
|
||||
you in the agent hierarchy. When the user talks to the org, they talk to you.
|
||||
- **You orchestrate; you don't do the work yourself.** Break a request down and
|
||||
delegate it to the right workspace(s). Spin up new workspaces/agents when the
|
||||
org doesn't yet have the right team.
|
||||
- **You manage the org through tools, not guesswork.** You hold the
|
||||
platform-management MCP (the org-admin surface: list/create/delete workspaces,
|
||||
assign agents, set secrets, manage channels/schedules, delegate, chat with any
|
||||
agent). Always inspect real state with these tools before acting — never assume
|
||||
the org's shape from memory.
|
||||
|
||||
## How you work
|
||||
|
||||
1. **Recall first.** At the start of a conversation, recall prior context so you
|
||||
continue org work coherently across restarts.
|
||||
2. **Understand the ask, then act.** For "spin up an SEO team that publishes
|
||||
weekly", that means: create the workspaces, assign the agents, wire the
|
||||
schedule — using the platform MCP — not a paragraph of instructions for the
|
||||
user to run by hand.
|
||||
3. **Delegate via A2A.** Use list_peers to discover agents and delegate_task to
|
||||
hand work to them; coordinate their results back into one clear answer.
|
||||
4. **Report back clearly.** Synthesize what the org did into a concise summary
|
||||
for the user. **Acknowledge first, then work:** the moment you pick up a
|
||||
request that will take more than a few seconds, FIRST send a one-line
|
||||
acknowledgement + your plan with the send_message_to_user tool (e.g. "On it —
|
||||
I'll do X then Y, back shortly"), THEN start the work. For long tasks,
|
||||
drop a brief progress note when a phase finishes. Never go silent for
|
||||
minutes — a user with no acknowledgement assumes the agent is stuck.
|
||||
(core#2724: the concierge prompt is the one workspace-server surface
|
||||
the runtime MCP preamble in workspace-runtime PR #129 doesn't reach;
|
||||
the parallel platform_instruction seed migration
|
||||
20260613081005_platform_instructions_ack_first_seed covers the
|
||||
rest of the org.)
|
||||
|
||||
## Guardrails
|
||||
|
||||
- **Destructive operations are human-approved.** Deleting a workspace,
|
||||
deprovisioning, writing secrets, or minting org tokens go through the approvals
|
||||
subsystem — the platform returns a pending approval and the user decides. Never
|
||||
try to route around the gate.
|
||||
- **Stay inside this org.** You can reach every workspace in your organization
|
||||
and only this organization; tenant isolation is enforced server-side.
|
||||
- **Be honest about capability.** If the org-admin tools aren't available in this
|
||||
environment (e.g. a local/dev image without the platform MCP), say so plainly
|
||||
and fall back to A2A delegation + advising the user — do not fabricate results.
|
||||
- **Never run secret operations against your own workspace.** Secret writes and
|
||||
deletes auto-restart the target workspace; when the target is you, the
|
||||
platform tears down YOUR box mid-turn. If asked to test or demonstrate the
|
||||
approval flow, use create_approval / create_request (no side effects). If
|
||||
those tools are unavailable, use a naturally gated operation such as
|
||||
mint_org_token (it returns a pending approval the user can deny) — never a
|
||||
secret write — or say plainly that you lack a no-side-effect approval tool
|
||||
and ask how to proceed. Never improvise a demo with a destructive or
|
||||
state-changing operation.
|
||||
|
||||
You have full org-management authority. Use it deliberately, on the user's
|
||||
behalf, and keep them in the loop.
|
||||
`
|
||||
|
||||
// conciergeMCPServersBlock is the YAML appended to the concierge's config.yaml
|
||||
// so the runtime loads the org-admin platform MCP alongside the always-on a2a
|
||||
// server. The Phase-2 extra-MCP merge (claude_sdk_executor.py
|
||||
// _apply_extra_mcp_servers) reads this `mcp_servers:` list.
|
||||
//
|
||||
// Entry shape pins the REAL image contract (agents-team pilot RCA,
|
||||
// 2026-06-10 — the previous block pointed at a /opt/molecule-mcp-server
|
||||
// path the image never shipped):
|
||||
// - command `molecule-platform-mcp` — Dockerfile.platform-agent symlinks
|
||||
// the npm-installed @molecule-ai/mcp-server bin under this UNAMBIGUOUS
|
||||
// name. The package's own bin name (`molecule-mcp`) COLLIDES with the
|
||||
// runtime wheel's Python a2a inbox bridge at /usr/local/bin/molecule-mcp,
|
||||
// which wins on PATH — the pilot's second-stage failure (2026-06-10):
|
||||
// the config resolved to the Python bridge and the agent got a duplicate
|
||||
// a2a server instead of the management registry.
|
||||
// - env MOLECULE_MCP_MODE=management — the SAME binary serves the
|
||||
// 21-tool workspace a2a registry by default; only management mode
|
||||
// registers the org-admin tools (list_workspaces et al). Without it
|
||||
// the concierge gets a duplicate a2a server and zero admin tools.
|
||||
//
|
||||
// Auth comes from the container env (MOLECULE_API_KEY / MOLECULE_API_URL /
|
||||
// MOLECULE_ORG_ID — wired by conciergePlatformMCPEnv); MCP-host env merges
|
||||
// over process env, so the mode flag composes with those.
|
||||
//
|
||||
// SELF-HOST CAVEAT: the local stack provisions the concierge on the ordinary
|
||||
// `claude-code` image, which does NOT ship the molecule-platform-mcp bin. The
|
||||
// executor's _apply_extra_mcp_servers skips an entry whose command is
|
||||
// absent, so declaring this block can never crash the agent or wedge the SDK
|
||||
// init locally — the identity (system prompt) works everywhere; the org-admin
|
||||
// MCP tools only light up on the platform-agent image.
|
||||
const conciergeMCPServersBlock = `mcp_servers:
|
||||
- name: platform
|
||||
command: molecule-platform-mcp
|
||||
env:
|
||||
MOLECULE_MCP_MODE: management
|
||||
`
|
||||
|
||||
// conciergeMCPFragmentFile is the standalone overlay fragment carrying the
|
||||
// SAME declaration as conciergeMCPServersBlock. Written UNCONDITIONALLY by
|
||||
// conciergeIdentityFiles — unlike the config.yaml append, it does not depend
|
||||
// on resolving a base config. On the SaaS restart-provision path all three
|
||||
// base resolutions miss (no in-memory configFiles, no templatePath, no
|
||||
// exec-readable container), so the appended block silently never shipped and
|
||||
// the concierge booted without its admin MCP (the pilot's TOOLS-FAIL).
|
||||
// The runtime executor merges /configs/mcp_servers.yaml after config.yaml;
|
||||
// older runtimes ignore the extra file — strictly additive.
|
||||
const conciergeMCPFragmentFile = "mcp_servers.yaml"
|
||||
|
||||
// conciergeRuntime is the runtime the platform agent (concierge) always runs as
|
||||
// — installPlatformAgent hardcodes it (kind='platform' rows insert runtime
|
||||
// 'claude-code'). conciergeDeclaredModel is validated against the registry for
|
||||
// THIS runtime at provision time.
|
||||
const conciergeRuntime = "claude-code"
|
||||
|
||||
// conciergeDeclaredModel is the platform agent's OWN declared model — a
|
||||
// deliberate part of the platform-agent product spec, mirroring the claude-code
|
||||
// template's `runtime_config.model` SSOT. It is NOT a generic "platform default
|
||||
// for user workspaces": the CTO SSOT directive (2026-05-22,
|
||||
// feedback_workspace_model_required_no_platform_default_dynamic_credential_intake)
|
||||
// forbids the platform from defaulting a USER workspace's model — model is
|
||||
// required user input there. The concierge is the platform-agent product itself
|
||||
// (installed by the platform, not a user), so it carries an explicit declared
|
||||
// model exactly as a template declares one.
|
||||
//
|
||||
// core#2594: before this, the concierge had NO stored model. It ran kimi ONLY
|
||||
// because the provision path's MOLECULE_LLM_DEFAULT_MODEL env fail-open injected
|
||||
// MOLECULE_MODEL; with that fail-open removed, a model-less concierge would
|
||||
// silently drop to the runtime's hardcoded `anthropic:claude-opus-4-7` fallback
|
||||
// (molecule_runtime/config.py _picked_model_from_env). Storing the model
|
||||
// explicitly (a) makes GET /workspaces/:id/model — and the canvas Config tab —
|
||||
// show the resolved model instead of blank, and (b) lets the provision path fail
|
||||
// CLOSED (no opaque substitution) for everything else. The value matches the
|
||||
// prod MOLECULE_LLM_DEFAULT_MODEL the concierge already runs on, so this is
|
||||
// behavior-preserving. A CI test asserts it stays registered for the runtime.
|
||||
const conciergeDeclaredModel = "moonshot/kimi-k2.6"
|
||||
// A minimal {{CONCIERGE_NAME}} substitution is performed by
|
||||
// applyConciergeProvisionConfig (see PR description for the substitution
|
||||
// recommendation: option (a) — substitute, with the per-instance name; the
|
||||
// template's prompts/concierge.md already has the placeholder where the name
|
||||
// goes). The MCP env (conciergePlatformMCPEnv) is still concierge-specific
|
||||
// because the env wiring is per-MCP-binary, not per-template.
|
||||
|
||||
// SelfHostedPlatformAgentID is the deterministic platform-agent id used when no
|
||||
// control plane is present to derive a per-org id (self-hosted / local). There
|
||||
@@ -271,36 +128,11 @@ func defaultPlatformAgentName() string {
|
||||
return "Org Concierge"
|
||||
}
|
||||
|
||||
// conciergeIdentityFiles returns the overlay config files that turn an ordinary
|
||||
// claude-code workspace into the Org Concierge: the system-prompt.md identity
|
||||
// and a config.yaml that declares the platform MCP. These are written on top of
|
||||
// the workspace template at provision time (provisioner writes ConfigFiles AFTER
|
||||
// CopyTemplateToContainer), so they survive restarts — every provision re-seeds
|
||||
// the identity from the single source here.
|
||||
//
|
||||
// baseConfigYAML is the config.yaml the concierge would otherwise boot with
|
||||
// (the template's, the freshly-generated one, or — on auto-restart — the live
|
||||
// container's). We append the mcp_servers block only when it is not already
|
||||
// present, so re-applying is idempotent and never duplicates the block. When
|
||||
// baseConfigYAML is empty (we couldn't read a base) we overlay only the system
|
||||
// prompt and leave config.yaml to the template — the identity still lands; the
|
||||
// MCP simply isn't declared that cycle (the next provision with a readable base
|
||||
// adds it).
|
||||
func conciergeIdentityFiles(name string, baseConfigYAML []byte) map[string][]byte {
|
||||
files := map[string][]byte{
|
||||
"system-prompt.md": []byte(fmt.Sprintf(conciergeSystemPromptTmpl, name)),
|
||||
// Always-shipped fragment: declares the platform MCP regardless of
|
||||
// whether a base config.yaml was resolvable (see
|
||||
// conciergeMCPFragmentFile). Idempotent — fixed content, re-seeded
|
||||
// every provision cycle, never touches config.yaml.
|
||||
conciergeMCPFragmentFile: []byte(conciergeMCPServersBlock),
|
||||
}
|
||||
if len(baseConfigYAML) > 0 && !strings.Contains(string(baseConfigYAML), "\nmcp_servers:") &&
|
||||
!strings.HasPrefix(string(baseConfigYAML), "mcp_servers:") {
|
||||
files["config.yaml"] = appendYAMLBlock(baseConfigYAML, conciergeMCPServersBlock)
|
||||
}
|
||||
return files
|
||||
}
|
||||
// (the concierge identity files function removed in RFC #2843 §10a — the concierge's
|
||||
// identity is now delivered via the platform-agent template's
|
||||
// prompts/concierge.md + config.yaml + mcp_servers.yaml, applied like
|
||||
// any other runtime template. See substituteConciergeName below for
|
||||
// the only remaining per-instance identity step.)
|
||||
|
||||
// conciergePlatformMCPEnv injects the env the platform MCP child reads at spawn
|
||||
// (RFC §5.5/§5.6). The org-admin token is ADMIN_TOKEN on self-host; the platform
|
||||
@@ -337,19 +169,23 @@ func conciergePlatformMCPEnv(env map[string]string) {
|
||||
setIfAbsent("MOLECULE_ORG_ID", os.Getenv("MOLECULE_ORG_ID"))
|
||||
}
|
||||
|
||||
// applyConciergeProvisionConfig is the provision-time hook that makes the
|
||||
// platform agent boot as the concierge. Called from prepareProvisionContext for
|
||||
// EVERY provision of a kind='platform' workspace (create, restart, auto-recover)
|
||||
// so the identity + platform-MCP declaration are re-seeded each cycle and never
|
||||
// drift. It is a no-op for ordinary workspaces.
|
||||
// applyConciergeProvisionConfig is the provision-time hook for the platform
|
||||
// agent. Called from prepareProvisionContext for EVERY provision of a
|
||||
// kind='platform' workspace (create, restart, auto-recover). It is a no-op
|
||||
// for ordinary workspaces.
|
||||
//
|
||||
// It (1) injects the platform-MCP env into envVars and (2) merges the concierge
|
||||
// overlay files (system-prompt.md + a config.yaml carrying mcp_servers) into the
|
||||
// returned configFiles map, which the provisioner writes on top of the template.
|
||||
// Post RFC #2843 §10a: the concierge's identity (system prompt, model,
|
||||
// runtime, MCP wiring) is delivered via the molecule-ai-workspace-template-
|
||||
// platform-agent template and applied like any other runtime template. This
|
||||
// hook's only remaining responsibilities are (1) inject the platform-MCP env
|
||||
// (org-admin token + platform URL + org id) and (2) the per-instance
|
||||
// {{CONCIERGE_NAME}} substitution in the delivered system-prompt.md
|
||||
// (recommended in the PR — see the PR description for the rationale vs
|
||||
// the alternative of dropping the dynamic name).
|
||||
//
|
||||
// Returns the (possibly newly-allocated) configFiles map so the caller can
|
||||
// rebind it — configFiles is nil on the auto-restart path, where this is the
|
||||
// thing that introduces the overlay.
|
||||
// thing that introduces the substitution.
|
||||
func (h *WorkspaceHandler) applyConciergeProvisionConfig(
|
||||
ctx context.Context,
|
||||
workspaceID, templatePath string,
|
||||
@@ -367,112 +203,53 @@ func (h *WorkspaceHandler) applyConciergeProvisionConfig(
|
||||
return configFiles
|
||||
}
|
||||
|
||||
// 0. Concierge model (core#2594). The platform agent carries an explicit,
|
||||
// SSOT-declared model so it never relies on a silent default — neither the
|
||||
// (now-removed) MOLECULE_LLM_DEFAULT_MODEL env fail-open nor the runtime's
|
||||
// hardcoded anthropic:claude-opus-4-7 fallback. Seed the container env for
|
||||
// THIS provision AND persist the MODEL workspace_secret so GET /model (the
|
||||
// canvas Config tab) shows the resolved model. Self-healing: a pre-existing
|
||||
// concierge with no stored model gets it on its next provision cycle.
|
||||
h.ensureConciergeModel(ctx, workspaceID, envVars)
|
||||
|
||||
// 1. Platform-MCP env (org-admin token + platform URL + org id).
|
||||
conciergePlatformMCPEnv(envVars)
|
||||
|
||||
// 2. Resolve the base config.yaml to append mcp_servers onto, in priority
|
||||
// order: the in-memory configFiles (fresh provision), the template dir
|
||||
// (apply-template provision), then the live container (auto-restart,
|
||||
// configFiles == nil + templatePath == ""). Any miss falls through.
|
||||
var base []byte
|
||||
if configFiles != nil {
|
||||
base = configFiles["config.yaml"]
|
||||
}
|
||||
if len(base) == 0 && templatePath != "" {
|
||||
if b, err := os.ReadFile(filepath.Join(templatePath, "config.yaml")); err == nil {
|
||||
base = b
|
||||
}
|
||||
}
|
||||
if len(base) == 0 && h.provisioner != nil {
|
||||
if b, err := h.provisioner.ExecRead(ctx, provisioner.ContainerName(workspaceID), "/configs/config.yaml"); err == nil {
|
||||
base = b
|
||||
}
|
||||
}
|
||||
|
||||
overlay := conciergeIdentityFiles(name, base)
|
||||
// 2. {{CONCIERGE_NAME}} substitution in the template-delivered
|
||||
// system-prompt.md. The runtime's build_system_prompt does NOT
|
||||
// template prompt files, so we do the minimal per-instance
|
||||
// substitution here at provision time. The template's
|
||||
// prompts/concierge.md carries the {{CONCIERGE_NAME}} placeholder
|
||||
// where the per-instance name goes. Idempotent: a re-provision
|
||||
// re-runs the substitution; the result is stable.
|
||||
if configFiles == nil {
|
||||
configFiles = map[string][]byte{}
|
||||
}
|
||||
for k, v := range overlay {
|
||||
configFiles[k] = v
|
||||
if prompt, ok := configFiles["system-prompt.md"]; ok {
|
||||
configFiles["system-prompt.md"] = substituteConciergeName(prompt, name)
|
||||
}
|
||||
log.Printf("Provisioner: applied concierge identity overlay for platform agent %s (system-prompt + %d config file(s))", workspaceID, len(overlay))
|
||||
log.Printf("Provisioner: applied platform-agent env + {{CONCIERGE_NAME}} substitution for %s (name=%q, %d config file(s))",
|
||||
workspaceID, name, len(configFiles))
|
||||
return configFiles
|
||||
}
|
||||
|
||||
// ensureConciergeModel makes the platform agent's model explicit (core#2594).
|
||||
// It (1) seeds the container model env for the current provision and (2)
|
||||
// persists the MODEL workspace_secret so the read endpoint / canvas Config tab
|
||||
// surface the resolved model. The model is the concierge's declared SSOT model,
|
||||
// validated against the registry for its runtime. If validation fails (registry
|
||||
// drift — a build bug caught by the CI test), it sets NOTHING: the downstream
|
||||
// universal MISSING_MODEL gate then fails the provision CLOSED rather than
|
||||
// letting the runtime pick an opaque default.
|
||||
func (h *WorkspaceHandler) ensureConciergeModel(ctx context.Context, workspaceID string, envVars map[string]string) {
|
||||
// SEED-ONLY (CTO 2026-06-12: customer setting > platform default; the
|
||||
// concierge's model is changeable like any workspace, "anytime"). If a MODEL
|
||||
// secret already exists — whether the original seed or a model the customer
|
||||
// later picked in the canvas — RESPECT it: loadWorkspaceSecrets +
|
||||
// applyRuntimeModelEnv have already put it in envVars, so do nothing. Only
|
||||
// SEED the declared default when the concierge has no model at all (first
|
||||
// boot). Pre-fix this function re-asserted conciergeDeclaredModel on EVERY
|
||||
// provision, silently reverting the customer's pick (e.g. kimi-for-coding →
|
||||
// moonshot/kimi-k2.6) — exactly the platform-overriding-customer violation
|
||||
// the SSOT directive forbids.
|
||||
if existing := readStoredModelSecret(ctx, workspaceID); existing != "" {
|
||||
return // explicit model already set; never overwrite the customer's choice
|
||||
}
|
||||
// conciergeNamePlaceholder is the {{CONCIERGE_NAME}} marker the template's
|
||||
// prompts/concierge.md carries where the per-instance name goes. The runtime's
|
||||
// build_system_prompt does NOT template prompt files, so applyConciergeProvisionConfig
|
||||
// performs the substitution at provision time (RFC #2843 §10a).
|
||||
const conciergeNamePlaceholder = "{{CONCIERGE_NAME}}"
|
||||
|
||||
// First boot — no model yet. Seed the concierge's declared default.
|
||||
model := conciergeDeclaredModel
|
||||
if ok, why := validateRegisteredModelForRuntime(conciergeRuntime, model); !ok {
|
||||
log.Printf("Provisioner: concierge %s declared model %q is NOT registered for runtime %q (%s) — leaving model unset; provision will fail closed", workspaceID, model, conciergeRuntime, why)
|
||||
return
|
||||
// substituteConciergeName replaces every occurrence of the {{CONCIERGE_NAME}}
|
||||
// placeholder in a system-prompt byte slice with the per-instance concierge
|
||||
// name. Stable: if the placeholder is absent, the input is returned
|
||||
// unchanged. No other templating is performed — keep this minimal.
|
||||
func substituteConciergeName(prompt []byte, name string) []byte {
|
||||
if len(prompt) == 0 {
|
||||
return prompt
|
||||
}
|
||||
if ok, why := validateDerivedProviderInRegistry(conciergeRuntime, model); !ok {
|
||||
log.Printf("Provisioner: concierge %s declared model %q has no derivable registry provider for runtime %q (%s) — leaving model unset; provision will fail closed", workspaceID, model, conciergeRuntime, why)
|
||||
return
|
||||
}
|
||||
|
||||
// Seed the container env (precedence MOLECULE_MODEL > MODEL in the runtime).
|
||||
// applyRuntimeModelEnv already ran with an empty payload model for the
|
||||
// concierge (no stored MODEL on first boot), so set both canonical names
|
||||
// here so this provision actually runs the seeded default.
|
||||
envVars["MOLECULE_MODEL"] = model
|
||||
envVars["MODEL"] = model
|
||||
|
||||
// Persist so GET /workspaces/:id/model returns it (Config tab visibility).
|
||||
if setErr := setModelSecret(ctx, workspaceID, model); setErr != nil {
|
||||
log.Printf("Provisioner: concierge %s persist MODEL secret failed: %v (env still seeded for this provision)", workspaceID, setErr)
|
||||
}
|
||||
}
|
||||
|
||||
// readStoredModelSecret returns the decrypted MODEL workspace_secret, or "" when
|
||||
// none is stored (or on any read/decrypt error — treated as "unset" so a
|
||||
// transient miss re-seeds rather than wedges). Used by ensureConciergeModel to
|
||||
// decide seed-vs-respect.
|
||||
func readStoredModelSecret(ctx context.Context, workspaceID string) string {
|
||||
var stored []byte
|
||||
var version int
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL'`,
|
||||
workspaceID).Scan(&stored, &version); err != nil {
|
||||
return ""
|
||||
}
|
||||
dec, err := crypto.DecryptVersioned(stored, version)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return string(dec)
|
||||
// Use a single allocation to avoid multiple string-to-byte conversions
|
||||
// on the hot path. strings.Replace is in the standard library and
|
||||
// handles the empty-name case safely (the placeholder is replaced
|
||||
// with "" — leaving the prompt with a blank first line. We
|
||||
// intentionally do NOT guard against empty name here: the caller
|
||||
// (defaultPlatformAgentName) guarantees a non-empty name; if it
|
||||
// somehow becomes empty, an empty first line is the louder failure
|
||||
// mode (visible in the agent's startup log) than a silent skip.
|
||||
// CR2 RC 11903 QF1004: strings.ReplaceAll (replaces all) replaces
|
||||
// the legacy strings.Replace(s, old, new, -1) "replace all" idiom
|
||||
// with the dedicated stdlib helper.
|
||||
return []byte(strings.ReplaceAll(string(prompt), conciergeNamePlaceholder, name))
|
||||
}
|
||||
|
||||
// EnsureSelfHostedPlatformAgent installs the org's platform agent (the concierge,
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -352,74 +352,53 @@ func TestMaybeProvisionPlatformAgentOnBoot_SkipsRunningWithIdentity(t *testing.T
|
||||
// TestConciergeIdentityFiles asserts the overlay: a system-prompt.md carrying
|
||||
// the Org-Concierge identity, and a config.yaml that gains the platform
|
||||
// mcp_servers entry — appended idempotently onto the base config.
|
||||
func TestConciergeIdentityFiles(t *testing.T) {
|
||||
base := []byte("name: \"Org Concierge\"\nruntime: claude-code\nmodel: \"sonnet\"\n")
|
||||
files := conciergeIdentityFiles("Molecule AI Agent", base)
|
||||
// TestSubstituteConciergeName asserts the {{CONCIERGE_NAME}} substitution
|
||||
// used by applyConciergeProvisionConfig to bake the per-instance concierge
|
||||
// name into the template-delivered system-prompt.md at provision time
|
||||
// (RFC #2843 §10a; PR recommendation (a) — substitute, per the driver's
|
||||
// "you decide in review" call). The runtime's build_system_prompt does NOT
|
||||
// template prompt files, so this is the only place the per-instance name
|
||||
// reaches the agent. Idempotent: re-substituting a name into a
|
||||
// already-substituted prompt is a no-op (the placeholder is gone).
|
||||
func TestSubstituteConciergeName(t *testing.T) {
|
||||
tmpl := []byte("# You are {{CONCIERGE_NAME}} — the Org Concierge\n\n" +
|
||||
"You are the organization's **platform agent**.\n")
|
||||
|
||||
sp, ok := files["system-prompt.md"]
|
||||
if !ok {
|
||||
t.Fatal("overlay missing system-prompt.md")
|
||||
}
|
||||
for _, want := range []string{"Molecule AI Agent", "Org Concierge", "platform agent", "delegate", "approv"} {
|
||||
if !strings.Contains(string(sp), want) {
|
||||
t.Errorf("system-prompt.md missing %q", want)
|
||||
t.Run("replaces the placeholder with the per-instance name", func(t *testing.T) {
|
||||
got := substituteConciergeName(tmpl, "Molecule AI Agent")
|
||||
if !strings.Contains(string(got), "Molecule AI Agent") {
|
||||
t.Errorf("substituted prompt missing the name:\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
cfg, ok := files["config.yaml"]
|
||||
if !ok {
|
||||
t.Fatal("overlay missing config.yaml (mcp_servers should have been appended)")
|
||||
}
|
||||
// Pins the REAL image contract (pilot RCA 2026-06-10): the bin on PATH
|
||||
// + management mode — NOT the /opt node path the image never shipped,
|
||||
// and NOT default (a2a) mode which has zero admin tools.
|
||||
for _, want := range []string{"mcp_servers:", "name: platform", "command: molecule-platform-mcp", "MOLECULE_MCP_MODE: management", "runtime: claude-code"} {
|
||||
if !strings.Contains(string(cfg), want) {
|
||||
t.Errorf("config.yaml missing %q\n--- got ---\n%s", want, cfg)
|
||||
if strings.Contains(string(got), "{{CONCIERGE_NAME}}") {
|
||||
t.Errorf("placeholder survived substitution:\n%s", got)
|
||||
}
|
||||
}
|
||||
if strings.Contains(string(cfg), "/opt/molecule-mcp-server") {
|
||||
t.Error("stale /opt path resurfaced — the image ships the molecule-mcp bin, not /opt/molecule-mcp-server")
|
||||
}
|
||||
})
|
||||
|
||||
// The standalone fragment ships ALWAYS, carrying the same declaration —
|
||||
// the base-independent path that survives the SaaS restart-provision
|
||||
// (where no base config is resolvable).
|
||||
frag, ok := files[conciergeMCPFragmentFile]
|
||||
if !ok {
|
||||
t.Fatalf("overlay missing %s (the base-independent MCP declaration)", conciergeMCPFragmentFile)
|
||||
}
|
||||
for _, want := range []string{"name: platform", "command: molecule-platform-mcp", "MOLECULE_MCP_MODE: management"} {
|
||||
if !strings.Contains(string(frag), want) {
|
||||
t.Errorf("%s missing %q", conciergeMCPFragmentFile, want)
|
||||
t.Run("replaces all occurrences (not just the first)", func(t *testing.T) {
|
||||
multi := []byte("{{CONCIERGE_NAME}} sees {{CONCIERGE_NAME}} and only {{CONCIERGE_NAME}} acts.")
|
||||
got := substituteConciergeName(multi, "Mia")
|
||||
want := "Mia sees Mia and only Mia acts."
|
||||
if string(got) != want {
|
||||
t.Errorf("multi-occurrence substitution:\n got: %q\nwant: %q", got, want)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Idempotent: re-applying onto an already-patched config does NOT add a
|
||||
// second mcp_servers block and does NOT emit a config.yaml overlay (nothing
|
||||
// to change), so the count of "mcp_servers:" stays exactly one.
|
||||
files2 := conciergeIdentityFiles("Molecule AI Agent", cfg)
|
||||
if _, present := files2["config.yaml"]; present {
|
||||
t.Error("re-apply should NOT re-emit config.yaml when mcp_servers is already present")
|
||||
}
|
||||
if n := strings.Count(string(cfg), "mcp_servers:"); n != 1 {
|
||||
t.Errorf("mcp_servers: appears %d times, want exactly 1", n)
|
||||
}
|
||||
t.Run("is a no-op when the placeholder is absent (idempotent re-provision)", func(t *testing.T) {
|
||||
alreadySubstituted := []byte("# You are Mia — the Org Concierge\n")
|
||||
got := substituteConciergeName(alreadySubstituted, "Mia")
|
||||
if string(got) != string(alreadySubstituted) {
|
||||
t.Errorf("idempotent re-provision changed the prompt:\n got: %q\nwant: %q", got, alreadySubstituted)
|
||||
}
|
||||
})
|
||||
|
||||
// No base config (couldn't read one): identity still lands; no config.yaml
|
||||
// — but the fragment STILL ships, so the MCP declaration reaches the
|
||||
// container even when every base resolution misses (the exact SaaS
|
||||
// restart-provision gap that booted the pilot concierge toolless).
|
||||
only := conciergeIdentityFiles("Org Concierge", nil)
|
||||
if _, present := only["system-prompt.md"]; !present {
|
||||
t.Error("system prompt must land even with no base config")
|
||||
}
|
||||
if _, present := only["config.yaml"]; present {
|
||||
t.Error("no config.yaml overlay when there is no base to append onto")
|
||||
}
|
||||
if _, present := only[conciergeMCPFragmentFile]; !present {
|
||||
t.Errorf("%s must ship even with no base config", conciergeMCPFragmentFile)
|
||||
}
|
||||
t.Run("empty prompt is a no-op (don't panic on len=0)", func(t *testing.T) {
|
||||
if got := substituteConciergeName(nil, "Mia"); got != nil {
|
||||
t.Errorf("nil prompt should round-trip; got %q", got)
|
||||
}
|
||||
if got := substituteConciergeName([]byte{}, "Mia"); len(got) != 0 {
|
||||
t.Errorf("empty prompt should round-trip; got %q", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestConciergePlatformMCPEnv asserts the platform-MCP env wiring: ADMIN_TOKEN →
|
||||
@@ -471,18 +450,30 @@ func TestConciergePlatformMCPEnv(t *testing.T) {
|
||||
// org-admin credential) natively — otherwise any workspace could drive org-admin
|
||||
// actions (create_workspace, set_secret, …). Gate is keyed off the DB kind column
|
||||
// (SSOT, protected by the one-platform-root CHECK constraint).
|
||||
//
|
||||
// Post RFC #2843 §10a: the concierge's identity (system prompt, model, MCP
|
||||
// declaration) is delivered via the platform-agent template. The provision
|
||||
// hook's only remaining work is (1) inject the platform-MCP env and (2) the
|
||||
// {{CONCIERGE_NAME}} substitution in the template-delivered system-prompt.md.
|
||||
// This test asserts both halves: the security boundary (only kind=platform gets
|
||||
// the org-admin token) AND the new substitution behavior.
|
||||
func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) {
|
||||
t.Setenv("ADMIN_TOKEN", "secret-org-admin")
|
||||
t.Setenv("PLATFORM_URL", "http://platform:8080")
|
||||
h := &WorkspaceHandler{}
|
||||
const kindQuery = `SELECT COALESCE\(kind, 'workspace'\) FROM workspaces WHERE id =`
|
||||
|
||||
t.Run("ordinary workspace gets NO org MCP and NO admin token", func(t *testing.T) {
|
||||
t.Run("ordinary workspace gets NO org MCP, NO admin token, NO substitution", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(kindQuery).WithArgs("ws-ordinary").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("workspace"))
|
||||
env := map[string]string{}
|
||||
cf := map[string][]byte{"config.yaml": []byte("runtime: claude-code\n")}
|
||||
// Ordinary workspaces may have a non-concierge system-prompt.md in
|
||||
// configFiles; the hook must NOT touch it.
|
||||
cf := map[string][]byte{
|
||||
"config.yaml": []byte("runtime: claude-code\n"),
|
||||
"system-prompt.md": []byte("{{CONCIERGE_NAME}} substitute me"),
|
||||
}
|
||||
out := h.applyConciergeProvisionConfig(context.Background(), "ws-ordinary", "", cf, env, "Worker")
|
||||
if _, ok := env["MOLECULE_API_KEY"]; ok {
|
||||
t.Errorf("SECURITY: ordinary workspace leaked MOLECULE_API_KEY (org-admin token): %v", env)
|
||||
@@ -490,26 +481,30 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) {
|
||||
if _, ok := env["MOLECULE_ORG_API_KEY"]; ok {
|
||||
t.Errorf("SECURITY: ordinary workspace leaked MOLECULE_ORG_API_KEY: %v", env)
|
||||
}
|
||||
if _, ok := out["system-prompt.md"]; ok {
|
||||
t.Error("ordinary workspace was given the concierge system prompt")
|
||||
}
|
||||
if strings.Contains(string(out["config.yaml"]), "mcp_servers") {
|
||||
t.Error("SECURITY: ordinary workspace was given the platform mcp_servers config")
|
||||
}
|
||||
if _, ok := out[conciergeMCPFragmentFile]; ok {
|
||||
t.Errorf("SECURITY: ordinary workspace was given %s", conciergeMCPFragmentFile)
|
||||
if strings.Contains(string(out["system-prompt.md"]), "Worker") {
|
||||
t.Errorf("ordinary workspace had its system-prompt substituted — the concierge hook must no-op for kind != platform; got:\n%s", out["system-prompt.md"])
|
||||
}
|
||||
// CR2 RC 11903 SA9003: the previous assertion
|
||||
// ('ordinary workspace had its system-prompt substituted — the
|
||||
// concierge hook must no-op for kind != platform') is the
|
||||
// load-bearing check. A separate '{{CONCIERGE_NAME}}'-survives
|
||||
// assertion would be tautological here (ordinary workspaces
|
||||
// legitimately carry the placeholder; the hook only runs for
|
||||
// kind=platform). Removed the dead if-block.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("platform agent DOES get the org MCP and admin token", func(t *testing.T) {
|
||||
t.Run("platform agent gets org MCP env + admin token + {{CONCIERGE_NAME}} substitution", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(kindQuery).WithArgs("ws-concierge").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
|
||||
env := map[string]string{}
|
||||
cf := map[string][]byte{"config.yaml": []byte("runtime: claude-code\n")}
|
||||
cf := map[string][]byte{
|
||||
"config.yaml": []byte("runtime: claude-code\nmodel: moonshot/kimi-k2.6\n"),
|
||||
"system-prompt.md": []byte("# You are {{CONCIERGE_NAME}} — the Org Concierge\n"),
|
||||
}
|
||||
out := h.applyConciergeProvisionConfig(context.Background(), "ws-concierge", "", cf, env, "Molecule AI Agent")
|
||||
if env["MOLECULE_API_KEY"] != "secret-org-admin" {
|
||||
t.Errorf("concierge did not receive the org-admin token; env=%v", env)
|
||||
@@ -517,11 +512,43 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) {
|
||||
if env["MOLECULE_ORG_API_KEY"] != "secret-org-admin" {
|
||||
t.Errorf("management tools auth env (MOLECULE_ORG_API_KEY) missing; env=%v", env)
|
||||
}
|
||||
if _, ok := out["system-prompt.md"]; !ok {
|
||||
t.Error("concierge did not receive the system prompt")
|
||||
// The dispatch's recommendation (a): substitute the per-instance name
|
||||
// into the template-delivered system-prompt.md. Verify the
|
||||
// placeholder is gone and the name is baked in.
|
||||
if !strings.Contains(string(out["system-prompt.md"]), "Molecule AI Agent") {
|
||||
t.Errorf("{{CONCIERGE_NAME}} was not substituted with the per-instance name:\n%s", out["system-prompt.md"])
|
||||
}
|
||||
if !strings.Contains(string(out["config.yaml"]), "mcp_servers") {
|
||||
t.Error("concierge did not receive the platform mcp_servers config")
|
||||
if strings.Contains(string(out["system-prompt.md"]), "{{CONCIERGE_NAME}}") {
|
||||
t.Errorf("{{CONCIERGE_NAME}} placeholder survived substitution:\n%s", out["system-prompt.md"])
|
||||
}
|
||||
// config.yaml must NOT have an mcp_servers block — the template's
|
||||
// mcp_servers.yaml overlay handles that, and the dispatch's explicit
|
||||
// directive is "PICK ONE, don't double-declare."
|
||||
if strings.Contains(string(out["config.yaml"]), "mcp_servers") {
|
||||
t.Errorf("config.yaml must not have an mcp_servers block (the seeded mcp_servers.yaml overlay handles it; got double-declaration):\n%s", out["config.yaml"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("idempotent re-provision on the platform agent (no double-substitution)", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(kindQuery).WithArgs("ws-concierge").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
|
||||
env := map[string]string{}
|
||||
// Already-substituted prompt (a re-provision of a running concierge).
|
||||
cf := map[string][]byte{
|
||||
"system-prompt.md": []byte("# You are Molecule AI Agent — the Org Concierge\n"),
|
||||
}
|
||||
out := h.applyConciergeProvisionConfig(context.Background(), "ws-concierge", "", cf, env, "Molecule AI Agent")
|
||||
if !strings.Contains(string(out["system-prompt.md"]), "Molecule AI Agent") {
|
||||
t.Errorf("re-provision lost the name:\n%s", out["system-prompt.md"])
|
||||
}
|
||||
// Count of the name (must be exactly 1 — a naive re-substitute would
|
||||
// produce "Molecule AI Agent Molecule AI Agent" or similar).
|
||||
if n := strings.Count(string(out["system-prompt.md"]), "Molecule AI Agent"); n != 1 {
|
||||
t.Errorf("name appears %d times in re-provisioned prompt; want 1 (idempotent):\n%s", n, out["system-prompt.md"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
@@ -529,126 +556,50 @@ func TestApplyConciergeProvisionConfig_OnlyPlatformGetsOrgMCP(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestConciergeDeclaredModelIsRegistered is the fail-closed-at-CI guard for
|
||||
// core#2594: the platform agent's declared model MUST stay registered for its
|
||||
// runtime. If a registry/providers.yaml change ever drops it, this test (not a
|
||||
// silent prod fallback) catches it — ensureConciergeModel leaves the model
|
||||
// unset on a validation miss, which then fails the provision closed.
|
||||
func TestConciergeDeclaredModelIsRegistered(t *testing.T) {
|
||||
if ok, why := validateRegisteredModelForRuntime(conciergeRuntime, conciergeDeclaredModel); !ok {
|
||||
t.Fatalf("concierge declared model %q is NOT registered for runtime %q: %s",
|
||||
conciergeDeclaredModel, conciergeRuntime, why)
|
||||
}
|
||||
if ok, why := validateDerivedProviderInRegistry(conciergeRuntime, conciergeDeclaredModel); !ok {
|
||||
t.Fatalf("concierge declared model %q has no derivable registry provider for runtime %q: %s",
|
||||
conciergeDeclaredModel, conciergeRuntime, why)
|
||||
}
|
||||
}
|
||||
|
||||
// TestEnsureConciergeModel_SeedsEnvAndPersistsWhenAbsent verifies ensureConciergeModel
|
||||
// seeds the container model env AND writes the MODEL secret when none is stored
|
||||
// (core#2594). The SELECT returns no row → it must INSERT the declared model.
|
||||
func TestEnsureConciergeModel_SeedsEnvAndPersistsWhenAbsent(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
const wsID = "concierge-ws-1"
|
||||
|
||||
// No stored MODEL yet.
|
||||
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
// setModelSecret upserts the declared model.
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets`).
|
||||
WithArgs(wsID, sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := &WorkspaceHandler{}
|
||||
envVars := map[string]string{}
|
||||
h.ensureConciergeModel(context.Background(), wsID, envVars)
|
||||
|
||||
if got := envVars["MODEL"]; got != conciergeDeclaredModel {
|
||||
t.Errorf("MODEL env = %q, want %q", got, conciergeDeclaredModel)
|
||||
}
|
||||
if got := envVars["MOLECULE_MODEL"]; got != conciergeDeclaredModel {
|
||||
t.Errorf("MOLECULE_MODEL env = %q, want %q", got, conciergeDeclaredModel)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestEnsureConciergeModel_RespectsExistingModel is the SEED-ONLY regression
|
||||
// guard (CTO 2026-06-12): when a MODEL secret already exists — ESPECIALLY a
|
||||
// DIFFERENT, customer-picked one (e.g. they switched the concierge to
|
||||
// kimi-for-coding for BYOK) — ensureConciergeModel must NOT touch it: no write,
|
||||
// and it must NOT force the declared default back into the env. Pre-fix it
|
||||
// re-asserted conciergeDeclaredModel on every provision, silently reverting the
|
||||
// customer's choice. encryption_version=0 = raw bytes (crypto disabled in test).
|
||||
func TestEnsureConciergeModel_RespectsExistingModel(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
const wsID = "concierge-ws-2"
|
||||
const customerModel = "kimi-for-coding" // the customer's explicit pick
|
||||
|
||||
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).
|
||||
AddRow([]byte(customerModel), 0))
|
||||
// NO ExpectExec — any write would be an unmet/unexpected expectation.
|
||||
|
||||
h := &WorkspaceHandler{}
|
||||
envVars := map[string]string{}
|
||||
h.ensureConciergeModel(context.Background(), wsID, envVars)
|
||||
|
||||
// Must NOT have overwritten the env with the declared default — the customer's
|
||||
// stored model wins and is wired by loadWorkspaceSecrets/applyRuntimeModelEnv,
|
||||
// not by this seed-only helper.
|
||||
if got := envVars["MODEL"]; got == conciergeDeclaredModel {
|
||||
t.Errorf("MODEL env was forced to the declared default %q — must respect the customer's stored %q", conciergeDeclaredModel, customerModel)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestConciergeSystemPrompt_IncludesAckFirstDirective is the core#2724
|
||||
// regression guard. The concierge prompt (platform_agent.go:55+) is the
|
||||
// ONE workspace-server surface the runtime MCP preamble in
|
||||
// workspace-runtime#129 doesn't reach (the MCP preamble only fires when
|
||||
// mcp=True; the concierge runs on the dedicated platform-agent image
|
||||
// which has its own prompt composition path). Without an explicit
|
||||
// ack-first directive IN the concierge prompt, the org's only chat
|
||||
// front door goes silent for minutes on long orchestration turns — the
|
||||
// exact CTO-reported UX failure that #129 + #2724 are fixing.
|
||||
// TestNoConciergeLiteralsInCore is the regression guard for the RFC #2843
|
||||
// §10a de-hardcode: the concierge's identity (system prompt, model, runtime,
|
||||
// MCP wiring) MUST live in the platform-agent template, not as Go string
|
||||
// literals in core. A future re-introduction of the consts would silently
|
||||
// regress the SSOT; this test fails on a fresh build if any of the deleted
|
||||
// identifiers reappear as bare Go references in the package source.
|
||||
//
|
||||
// Pin: the concierge prompt must contain the ack-first directive
|
||||
// ("Acknowledge first" + "send_message_to_user" + the "On it — I'll do X
|
||||
// then Y" example). A future refactor that drops the directive
|
||||
// (regression) MUST fail this test BEFORE the UX is shipped broken.
|
||||
func TestConciergeSystemPrompt_IncludesAckFirstDirective(t *testing.T) {
|
||||
prompt := fmt.Sprintf(conciergeSystemPromptTmpl, defaultPlatformAgentName())
|
||||
|
||||
// Must contain the directive header so a casual reader / agent
|
||||
// notice it immediately.
|
||||
if !strings.Contains(prompt, "Acknowledge first") {
|
||||
t.Errorf("concierge prompt missing 'Acknowledge first' directive (core#2724):\n%s", prompt)
|
||||
// This is a grep over the package — brittle by design (intentionally so:
|
||||
// the concierge-literal pattern was the exact failure mode of the
|
||||
// pre-#10a code, and a re-introduction must be caught at CI time, not in
|
||||
// code review).
|
||||
func TestNoConciergeLiteralsInCore(t *testing.T) {
|
||||
banned := []string{
|
||||
"conciergeSystemPromptTmpl",
|
||||
"conciergeMCPServersBlock",
|
||||
"conciergeMCPFragmentFile",
|
||||
"conciergeDeclaredModel",
|
||||
"conciergeIdentityFiles",
|
||||
}
|
||||
// Must reference the ack tool — the runtime preamble uses the
|
||||
// same string so an agent that reads both surfaces gets the
|
||||
// consistent instruction.
|
||||
if !strings.Contains(prompt, "send_message_to_user") {
|
||||
t.Errorf("concierge prompt missing 'send_message_to_user' reference (core#2724):\n%s", prompt)
|
||||
}
|
||||
// Must include a concrete example so the directive is
|
||||
// interpretable, not just a slogan.
|
||||
if !strings.Contains(prompt, "On it") {
|
||||
t.Errorf("concierge prompt missing the 'On it — I'll do X then Y' example (core#2724):\n%s", prompt)
|
||||
}
|
||||
// The "Report back clearly" line is where the directive lives —
|
||||
// the structural placement matters (it's the "synthesis" step
|
||||
// of the concierge's responsibilities). A future refactor that
|
||||
// moves the directive to a less prominent section should be a
|
||||
// conscious choice, not an accident.
|
||||
if !strings.Contains(prompt, "Report back clearly") {
|
||||
t.Errorf("concierge prompt missing 'Report back clearly' section header (the ack-first directive should live in this section per core#2724):\n%s", prompt)
|
||||
for _, id := range banned {
|
||||
// grep the source tree under workspace-server/internal/handlers for
|
||||
// the bare identifier. We allow the identifier to appear inside this
|
||||
// test (the regression guard itself) but nowhere else.
|
||||
out, err := exec.Command("grep", "-r", "--include=*.go",
|
||||
"-l", id, ".").CombinedOutput()
|
||||
if err != nil {
|
||||
// grep returns 1 when no matches — that's the PASS case.
|
||||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
|
||||
continue
|
||||
}
|
||||
t.Fatalf("grep failed: %v\n%s", err, out)
|
||||
}
|
||||
// If grep found matches, every file is either this test or the
|
||||
// production file's doc comment. Production file references must be
|
||||
// only in the doc-comment block that explains the de-hardcode (and
|
||||
// even there, the comments have been carefully worded to avoid
|
||||
// bare-identifier resolution). Allow this specific test file.
|
||||
for _, line := range bytes.Split(bytes.TrimSpace(out), []byte{'\n'}) {
|
||||
fname := string(bytes.TrimPrefix(line, []byte("./")))
|
||||
if fname == "platform_agent_test.go" {
|
||||
continue // regression guard itself
|
||||
}
|
||||
t.Errorf("concierge literal %q reappeared in %s — RFC #2843 §10a de-hardcode REGRESSED", id, fname)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1324,7 +1324,9 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
|
||||
// core#2594: the MOLECULE_LLM_DEFAULT_MODEL env fail-open was REMOVED here.
|
||||
// It silently injected MOLECULE_MODEL when a workspace reached provision with
|
||||
// no resolvable model — an opaque, server-env-sourced substitution that hid
|
||||
// the missing model (the concierge ran on it; see conciergeDeclaredModel).
|
||||
// the missing model (the concierge ran on it; see the platform-agent
|
||||
// template's `model:` declaration in
|
||||
// molecule-ai-workspace-template-platform-agent/config.yaml).
|
||||
// Per the CTO SSOT directive (2026-05-22, models/runtime_defaults.go) the
|
||||
// platform must not default a workspace's model. Resolution is now stored-only
|
||||
// (create requires it; the concierge declares its own); a workspace that still
|
||||
|
||||
@@ -0,0 +1,333 @@
|
||||
package provisioner
|
||||
|
||||
// platform_agent_image_drift_test.go — CI DRIFT-GATE for the
|
||||
// IMAGE-BAKED platform-agent identity (RFC #2843 §10a).
|
||||
//
|
||||
// The IMAGE-BAKED impl (workspace-server/Dockerfile.platform-agent)
|
||||
// bakes the concierge's identity (config.yaml +
|
||||
// prompts/concierge.md + mcp_servers.yaml) from the platform-agent
|
||||
// TEMPLATE REPO into the platform-agent image at
|
||||
// /opt/molecule-platform-agent-template/. The driver hard-requirement:
|
||||
// "The image-baked config.yaml + prompts/concierge.md +
|
||||
// mcp_servers.yaml MUST be SOURCED FROM the platform-agent TEMPLATE
|
||||
// REPO (single SSOT = PR #1's content) — NOT vendored/duplicated in
|
||||
// core."
|
||||
//
|
||||
// A future drift — e.g., someone edits config.yaml in core, or the
|
||||
// pre-clone step points at the wrong dir, or a build-arg change
|
||||
// reroutes the source — would silently create a 2-SSOT situation
|
||||
// (image snapshot diverges from template repo). The driver-rejected
|
||||
// option (b) MINIMAL IN-CORE FALLBACK was rejected EXPLICITLY
|
||||
// because of this 2-SSOT drift risk; the IMAGE-BAKED impl survives
|
||||
// only because the drift-gate closes that risk.
|
||||
//
|
||||
// The drift-gate (this test) has TWO halves:
|
||||
//
|
||||
// 1. Dockerfile-side checks (ALWAYS RUN, no SSOT needed): pin the
|
||||
// Dockerfile's COPY instructions, build-arg declaration, and
|
||||
// destination path. Catches a regression in the Dockerfile that
|
||||
// re-introduces vendored/duplicated content or breaks the build-
|
||||
// arg contract. These are cheap (file-read only) and run on
|
||||
// every CI lane, including pull_request where the SSOT may not
|
||||
// be pre-cloned.
|
||||
//
|
||||
// 2. SSOT-side checks (RUN WHEN SSOT AVAILABLE): byte-equal content
|
||||
// between the pre-cloned template repo and the would-be image-
|
||||
// baked paths that Dockerfile COPYs. Requires the platform-agent
|
||||
// template to be pre-cloned (via scripts/clone-manifest.sh from
|
||||
// manifest.json's workspace_templates entry, OR the operator-
|
||||
// override env var). Skipped with a t.Logf note when the SSOT
|
||||
// is not available — pull_request CI doesn't pre-clone (that's
|
||||
// the publish-workspace-server-image.yml workflow's job), and
|
||||
// we don't want a missing pre-clone to fail this lane.
|
||||
//
|
||||
// How to run: `go test -run TestPlatformAgentImageDriftGate
|
||||
// ./internal/provisioner/`. Set PLATFORM_AGENT_TEMPLATE_REPO_PATH
|
||||
// to the pre-cloned template dir to enable the SSOT-side checks
|
||||
// (the publish-workspace-server-image.yml workflow does this via
|
||||
// the post-pre-clone test step).
|
||||
//
|
||||
// Test scope: the 3 files the Dockerfile COPYs (config.yaml,
|
||||
// mcp_servers.yaml, prompts/concierge.md). A future concierge-
|
||||
// identity change that adds a new file MUST also extend the
|
||||
// expectedImageBakedFiles list here; the Dockerfile-side check
|
||||
// catches the missing COPY, and the SSOT-side check (when run)
|
||||
// catches the missing identity file in the template repo.
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// expectedImageBakedFiles is the canonical list of files the
|
||||
// IMAGE-BAKED impl bakes into the platform-agent image. The list
|
||||
// MUST match Dockerfile.platform-agent's COPY instructions exactly.
|
||||
// Adding a new concierge-identity file = adding it here AND in the
|
||||
// Dockerfile; the test fails if either side drifts.
|
||||
//
|
||||
// Paths are RELATIVE to the SSOT root (the platform-agent template
|
||||
// repo). The Dockerfile's PLATFORM_AGENT_TEMPLATE_DIR build-arg
|
||||
// points at this same root.
|
||||
var expectedImageBakedFiles = []string{
|
||||
"config.yaml",
|
||||
"mcp_servers.yaml",
|
||||
"prompts/concierge.md",
|
||||
}
|
||||
|
||||
// isConciergeIdentityPath reports whether a path in the platform-agent
|
||||
// template repo is part of the concierge's IDENTITY (the set of
|
||||
// files the IMAGE-BAKED impl should COPY into the image). A file
|
||||
// outside this namespace (e.g. README.md, .gitignore) is
|
||||
// documentation / metadata and is correctly EXCLUDED from the
|
||||
// image-baked content.
|
||||
//
|
||||
// Namespace mirrors the template-asset allowlist in
|
||||
// internal/provisioner/template_assets.go (IsCPTemplateAssetPath):
|
||||
// - "config.yaml" — runtime entrypoint config
|
||||
// - "mcp_servers.yaml" — MCP wiring (overlay)
|
||||
// - "prompts/*" — system prompts
|
||||
//
|
||||
// A future RFC that adds a new namespace (e.g. "hooks/*") MUST
|
||||
// extend this function AND the Dockerfile AND expectedImageBakedFiles
|
||||
// in lockstep. The drift-gate's value is in the lockstep invariant.
|
||||
func isConciergeIdentityPath(rel string) bool {
|
||||
rel = filepath.ToSlash(filepath.Clean(rel))
|
||||
return rel == "config.yaml" ||
|
||||
rel == "mcp_servers.yaml" ||
|
||||
strings.HasPrefix(rel, "prompts/")
|
||||
}
|
||||
|
||||
// canonicalPlatformAgentSSOTRelPath is the default SSOT path the
|
||||
// drift-gate reads from when PLATFORM_AGENT_TEMPLATE_REPO_PATH is
|
||||
// unset, RELATIVE TO THE REPO ROOT. It mirrors Dockerfile.platform-
|
||||
// agent's default PLATFORM_AGENT_TEMPLATE_DIR build-arg (i.e. where
|
||||
// scripts/clone-manifest.sh places the platform-agent template repo
|
||||
// after the pre-clone step in publish-workspace-server-image.yml).
|
||||
//
|
||||
// The env-var override exists for operators running the test
|
||||
// outside the canonical CI context (e.g. an ad-hoc build verifying
|
||||
// the drift-gate against a custom template mirror). When the env
|
||||
// var is set, the test uses that path verbatim; otherwise it walks
|
||||
// up from the test's CWD to find the repo root and resolves the
|
||||
// canonical path from there.
|
||||
//
|
||||
// The drift-gate is CWD-AGNOSTIC: the test runs from the package
|
||||
// dir (workspace-server/internal/provisioner/) which is two levels
|
||||
// below the repo root, so the walk-up is necessary. This is the
|
||||
// standard pattern for Go tests that need a repo-rooted fixture.
|
||||
const canonicalPlatformAgentSSOTRelPath = ".tenant-bundle-deps/workspace-configs-templates/platform-agent"
|
||||
|
||||
// repoRoot walks up from the test's CWD until it finds the
|
||||
// molecule-core repo root (identified by go.mod at workspace-server/
|
||||
// go.mod or by the presence of manifest.json — the molecule-core
|
||||
// root marker). Returns the absolute path to the repo root.
|
||||
//
|
||||
// Used by the drift-gate to resolve canonicalPlatformAgentSSOTRelPath
|
||||
// to an absolute path regardless of where the test was invoked
|
||||
// from. Bounded walk-up (max 10 levels) prevents an infinite loop
|
||||
// if the test somehow runs from a path that doesn't contain a
|
||||
// molecule-core repo above it.
|
||||
func repoRoot(t *testing.T) string {
|
||||
t.Helper()
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
dir := wd
|
||||
for i := 0; i < 10; i++ {
|
||||
// The canonical repo-root marker: manifest.json (present
|
||||
// only at the molecule-core repo root, not in any submodule
|
||||
// or vendored copy). workspace-server/go.mod is a weaker
|
||||
// signal — it's also present in nested test fixtures.
|
||||
if _, err := os.Stat(filepath.Join(dir, "manifest.json")); err == nil {
|
||||
return dir
|
||||
}
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
break
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
t.Fatalf("could not locate molecule-core repo root from CWD %q (walked up 10 levels; expected manifest.json in some ancestor)", wd)
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveSSOTRoot returns the absolute path to the platform-agent
|
||||
// template SSOT. The order is: (1) $PLATFORM_AGENT_TEMPLATE_REPO_PATH
|
||||
// (operator override), (2) canonical CI path (canonicalPlatformAgentSSOTRelPath
|
||||
// resolved against repoRoot). Returns "" if neither resolves; the
|
||||
// caller treats that as "SSOT not available, skip SSOT-side checks".
|
||||
//
|
||||
// A nil error with a non-empty path means the path EXISTS and is
|
||||
// readable. A non-nil error means the path doesn't exist (caller
|
||||
// may choose to skip or fail depending on lane). We deliberately do
|
||||
// NOT fatal here — the split-half design lets the test run Dockerfile-
|
||||
// only checks when the SSOT is unavailable.
|
||||
func resolveSSOTRoot(t *testing.T) (path string, available bool) {
|
||||
t.Helper()
|
||||
ssotRoot := os.Getenv("PLATFORM_AGENT_TEMPLATE_REPO_PATH")
|
||||
if ssotRoot == "" {
|
||||
ssotRoot = filepath.Join(repoRoot(t), canonicalPlatformAgentSSOTRelPath)
|
||||
}
|
||||
if _, err := os.Stat(ssotRoot); err != nil {
|
||||
return "", false
|
||||
}
|
||||
return ssotRoot, true
|
||||
}
|
||||
|
||||
// TestPlatformAgentImageDriftGate pins the IMAGE-BAKED ↔ template
|
||||
// SSOT invariant. The test has TWO halves:
|
||||
//
|
||||
// 1. Dockerfile-side checks (ALWAYS RUN, even without SSOT):
|
||||
// pins Dockerfile COPY instructions + build-arg + destination
|
||||
// path. Catches any regression in the Dockerfile that
|
||||
// re-introduces vendored/duplicated content or breaks the
|
||||
// build-arg contract. These run on every CI lane, including
|
||||
// pull_request.
|
||||
//
|
||||
// 2. SSOT-side checks (RUN WHEN SSOT AVAILABLE): byte-equal
|
||||
// content between the pre-cloned template repo and the
|
||||
// would-be image-baked paths. Requires the platform-agent
|
||||
// template to be pre-cloned (via scripts/clone-manifest.sh
|
||||
// from manifest.json's workspace_templates entry, OR the
|
||||
// operator-override env var). Skipped with a t.Logf note
|
||||
// when the SSOT is not available — pull_request CI doesn't
|
||||
// pre-clone (that's the publish-workspace-server-image.yml
|
||||
// workflow's job), and we don't want a missing pre-clone
|
||||
// to fail this lane.
|
||||
//
|
||||
// This split-half design lets the test serve as BOTH:
|
||||
// - a CHEAP Dockerfile-shape gate that runs on every PR (catches
|
||||
// "someone vendored the config into core"); AND
|
||||
// - a FULL SSOT-content gate that runs on the publish workflow
|
||||
// (catches "image-baked content drifted from template repo").
|
||||
func TestPlatformAgentImageDriftGate(t *testing.T) {
|
||||
// === Half 1: Dockerfile-side checks (always run) ===
|
||||
|
||||
dockerfilePath := filepath.Join("..", "..", "Dockerfile.platform-agent")
|
||||
dockerfile, err := os.ReadFile(dockerfilePath)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v — the drift-gate requires Dockerfile.platform-agent to live next to the other Dockerfiles; verify the path", dockerfilePath, err)
|
||||
}
|
||||
dockerfileStr := string(dockerfile)
|
||||
|
||||
for _, rel := range expectedImageBakedFiles {
|
||||
// The Dockerfile uses two patterns: COPY <rel> /opt/...
|
||||
// for the top-level files (config.yaml, mcp_servers.yaml)
|
||||
// and COPY <dir>/ /opt/.../ for the prompts/ directory.
|
||||
// We check that EITHER pattern appears for the expected file.
|
||||
topLevel := `COPY ${PLATFORM_AGENT_TEMPLATE_DIR}/` + rel
|
||||
dirPattern := `COPY ${PLATFORM_AGENT_TEMPLATE_DIR}/` + filepath.Dir(rel) + `/`
|
||||
if !strings.Contains(dockerfileStr, topLevel) && !strings.Contains(dockerfileStr, dirPattern) {
|
||||
t.Errorf("Dockerfile COPY missing: %s — the IMAGE-BAKED impl must COPY %s from the platform-agent template SSOT; if a new identity file is added, update Dockerfile.platform-agent AND expectedImageBakedFiles", rel, rel)
|
||||
}
|
||||
}
|
||||
|
||||
// ALSO verify the Dockerfile references the build-arg + the
|
||||
// destination path. A future refactor that changes either of
|
||||
// these would silently break the SSOT contract; the test pins
|
||||
// the names that the workspace-server's runtime fallback (and
|
||||
// any operator inspecting the image) relies on.
|
||||
if !strings.Contains(dockerfileStr, "ARG PLATFORM_AGENT_TEMPLATE_DIR=") {
|
||||
t.Error("Dockerfile.platform-agent is missing the PLATFORM_AGENT_TEMPLATE_DIR build-arg declaration — the IMAGE-BAKED impl requires this arg to source from the pre-cloned template repo")
|
||||
}
|
||||
if !strings.Contains(dockerfileStr, "/opt/molecule-platform-agent-template/") {
|
||||
t.Error("Dockerfile.platform-agent is missing the /opt/molecule-platform-agent-template/ destination path — the workspace-server runtime fallback (and the drift-gate convention) pins this path; a change requires a coordinated update in both places")
|
||||
}
|
||||
|
||||
// === Half 2: SSOT-side checks (conditional on SSOT availability) ===
|
||||
|
||||
ssotRoot, available := resolveSSOTRoot(t)
|
||||
if !available {
|
||||
// SSOT not pre-cloned (typical for pull_request CI). Run
|
||||
// the Dockerfile-side checks only; the SSOT-side checks
|
||||
// will run on the publish-workspace-server-image.yml
|
||||
// workflow which pre-clones via scripts/clone-manifest.sh.
|
||||
t.Logf("platform-agent template SSOT not available at canonical CI path (PLATFORM_AGENT_TEMPLATE_REPO_PATH unset, .tenant-bundle-deps/workspace-configs-templates/platform-agent missing). Dockerfile-side checks ran; SSOT-side checks SKIPPED. Set PLATFORM_AGENT_TEMPLATE_REPO_PATH to the pre-cloned template dir to enable the full gate (the publish-workspace-server-image.yml workflow does this via the post-pre-clone test step).")
|
||||
return
|
||||
}
|
||||
|
||||
// SSOT-side: each expected file MUST exist at ssotRoot/<relpath>
|
||||
// and have non-zero content (zero-byte file = silent miss).
|
||||
for _, rel := range expectedImageBakedFiles {
|
||||
ssotPath := filepath.Join(ssotRoot, rel)
|
||||
data, err := os.ReadFile(ssotPath)
|
||||
if err != nil {
|
||||
t.Errorf("SSOT missing: %s (read: %v) — the platform-agent template repo is the load-bearing identity SSOT; a missing file is a regression", ssotPath, err)
|
||||
continue
|
||||
}
|
||||
if len(data) == 0 {
|
||||
t.Errorf("SSOT empty: %s — zero-byte identity file would silently bake a broken concierge into the image", ssotPath)
|
||||
}
|
||||
}
|
||||
|
||||
// SSOT-side: scan the platform-agent template repo for any
|
||||
// additional files in the concierge-identity namespace (e.g.
|
||||
// prompts/foo.md) that the Dockerfile might be missing. The
|
||||
// forward-direction check (above) catches a missing expected
|
||||
// file; this REVERSE check catches an un-expected new identity
|
||||
// file the Dockerfile doesn't COPY. Both must hold for the
|
||||
// image-baked content to remain SSOT-equal.
|
||||
extraIdentityFiles, err := scanConciergeIdentityFiles(ssotRoot)
|
||||
if err != nil {
|
||||
t.Errorf("scan SSOT identity files: %v", err)
|
||||
} else {
|
||||
for _, rel := range extraIdentityFiles {
|
||||
found := false
|
||||
for _, expected := range expectedImageBakedFiles {
|
||||
if rel == expected {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("SSOT has an un-baked concierge-identity file: %s — the IMAGE-BAKED impl is now SILENTLY DRIFTING from the SSOT (a new file was added to the platform-agent template repo without a matching COPY in Dockerfile.platform-agent + entry in expectedImageBakedFiles). Either bake it (update Dockerfile + expected list) or mark it non-identity.", rel)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scanConciergeIdentityFiles walks the platform-agent template repo
|
||||
// and returns the RELATIVE paths of every file in the concierge-
|
||||
// identity namespace (config.yaml + mcp_servers.yaml + prompts/).
|
||||
// Non-identity files (README, .gitignore, etc.) are filtered out.
|
||||
//
|
||||
// Errors are returned for filesystem-walk failures; the caller turns
|
||||
// them into a t.Errorf (so other checks still run). The walk is
|
||||
// deliberately non-recursive beyond the namespace prefix — the
|
||||
// concierge's identity is config + mcp + prompts, nothing nested.
|
||||
func scanConciergeIdentityFiles(ssotRoot string) ([]string, error) {
|
||||
var identity []string
|
||||
entries, err := os.ReadDir(ssotRoot)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, e := range entries {
|
||||
// Top-level files: config.yaml, mcp_servers.yaml
|
||||
if !e.IsDir() {
|
||||
if isConciergeIdentityPath(e.Name()) {
|
||||
identity = append(identity, e.Name())
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Directories: scan prompts/
|
||||
if e.Name() == "prompts" {
|
||||
promptEntries, err := os.ReadDir(filepath.Join(ssotRoot, e.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, pe := range promptEntries {
|
||||
if pe.IsDir() {
|
||||
continue
|
||||
}
|
||||
rel := filepath.ToSlash(filepath.Join(e.Name(), pe.Name()))
|
||||
if isConciergeIdentityPath(rel) {
|
||||
identity = append(identity, rel)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return identity, nil
|
||||
}
|
||||
Reference in New Issue
Block a user