molecule-core/docker-compose.yml
Molecule AI Core-DevOps 40736a41e1
Some checks failed
Secret scan / Scan diff for credential-shaped strings (pull_request) Failing after 3s
sop-tier-check / tier-check (pull_request) Failing after 2s
infra: pin all compose file image digests
Replace mutable tags (postgres:16-alpine, redis:7-alpine,
clickhouse/clickhouse-server:24-alpine, temporalio/auto-setup:1.25,
temporalio/ui:2.31.2, langfuse/langfuse:2, litellm:main-latest,
ollama:latest) with pinned SHA256 digests fetched from Docker Hub / GHCR.

Rationale: mutable image tags can silently resolve to a different image
over time, creating supply-chain risk. Digest-pinning ensures the
exact image content runs every time.

Refresh procedure documented in comments above each image line:
- Docker Hub: curl https://hub.docker.com/v2/repositories/<img>/tags/<tag>
- GHCR: curl -sI https://ghcr.io/v2/<owner>/<repo>/manifests/<tag>

Remaining: canvas ECR image (requires AWS credentials to fetch digest).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 12:06:10 +00:00

351 lines
16 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Include infra services (Temporal, Langfuse) so `docker compose up` starts the full stack.
include:
- docker-compose.infra.yml
services:
# --- Infrastructure ---
# digest-pinned 2026-05-10 (sha256:4941ef97aaa2633ce9808f7766f8b8d746dd039ce8c51ca6da185c3dc63ab579, linux/amd64)
postgres:
image: postgres@sha256:4941ef97aaa2633ce9808f7766f8b8d746dd039ce8c51ca6da185c3dc63ab579
environment:
POSTGRES_USER: ${POSTGRES_USER:-dev}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
POSTGRES_DB: ${POSTGRES_DB:-molecule}
command: ["postgres", "-c", "wal_level=logical"]
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
networks:
- molecule-core-net
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
interval: 2s
timeout: 5s
retries: 10
langfuse-db-init:
image: postgres@sha256:4941ef97aaa2633ce9808f7766f8b8d746dd039ce8c51ca6da185c3dc63ab579
depends_on:
postgres:
condition: service_healthy
environment:
POSTGRES_USER: ${POSTGRES_USER:-dev}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
command:
- /bin/sh
- -c
- |
export PGPASSWORD="$${POSTGRES_PASSWORD}"
until pg_isready -h postgres -U "$${POSTGRES_USER}" -d postgres >/dev/null 2>&1; do
sleep 1
done
if ! psql -h postgres -U "$${POSTGRES_USER}" -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'langfuse'" | grep -q 1; then
psql -h postgres -U "$${POSTGRES_USER}" -d postgres -c "CREATE DATABASE langfuse"
fi
networks:
- molecule-core-net
# digest-pinned 2026-05-10 (sha256:b1addbe72465a718643cff9e60a58e6df1841e29d6d7d60c9a85d8d72f08d1a7, linux/amd64)
redis:
image: redis@sha256:b1addbe72465a718643cff9e60a58e6df1841e29d6d7d60c9a85d8d72f08d1a7
command: ["redis-server", "--notify-keyspace-events", "KEA"]
ports:
- "6379:6379"
volumes:
- redisdata:/data
networks:
- molecule-core-net
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 2s
timeout: 5s
retries: 10
# --- Observability ---
# digest-pinned 2026-05-10 (sha256:5b296e0ba1da74efea3143c773ddd60245f249fb7c72eb1d866c2d6ebc759fbe, linux/amd64)
langfuse-clickhouse:
image: clickhouse/clickhouse-server@sha256:5b296e0ba1da74efea3143c773ddd60245f249fb7c72eb1d866c2d6ebc759fbe
environment:
CLICKHOUSE_DB: langfuse
CLICKHOUSE_USER: langfuse
CLICKHOUSE_PASSWORD: langfuse
volumes:
- clickhousedata:/var/lib/clickhouse
networks:
- molecule-core-net
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:8123/ping || exit 1"]
interval: 5s
timeout: 5s
retries: 10
# digest-pinned 2026-05-10 (sha256:e7aafd3ccf721821b40f8b2251220b4bb8af5e4877b5c5a8846af5b3318aaf1d, linux/amd64)
langfuse:
image: langfuse/langfuse@sha256:e7aafd3ccf721821b40f8b2251220b4bb8af5e4877b5c5a8846af5b3318aaf1d
depends_on:
langfuse-clickhouse:
condition: service_healthy
langfuse-db-init:
condition: service_completed_successfully
environment:
DATABASE_URL: postgres://${POSTGRES_USER:-dev}:${POSTGRES_PASSWORD:-dev}@postgres:5432/langfuse
# Langfuse v2 expects the HTTP interface (port 8123). The previous
# clickhouse://...:9000 native-protocol URL is rejected with
# "ClickHouse URL protocol must be either http or https".
CLICKHOUSE_URL: http://langfuse-clickhouse:8123
CLICKHOUSE_MIGRATION_URL: clickhouse://langfuse-clickhouse:9000
CLICKHOUSE_USER: langfuse
CLICKHOUSE_PASSWORD: langfuse
NEXTAUTH_SECRET: ${LANGFUSE_SECRET:-changeme-langfuse-secret}
NEXTAUTH_URL: http://localhost:3001
SALT: ${LANGFUSE_SALT:-changeme-langfuse-salt}
ports:
- "3001:3000"
networks:
- molecule-core-net
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/public/health || exit 1"]
interval: 10s
timeout: 5s
retries: 10
# --- Platform ---
platform:
build:
# Build context MUST be repo root, not ./platform — the Dockerfile
# COPYs `workspace-server/migrations`, `workspace-server/go.mod`,
# `workspace-configs-templates/` etc. via repo-relative paths so it
# can bake in templates + migrations alongside the platform binary.
# When context was ./platform earlier, docker silently cached an
# earlier image (the COPY workspace-server/migrations resolved to nothing
# under ./workspace-server/, so layers stopped invalidating) — manifested
# as migration 023 not landing after PR #417 merged. CI workflow
# already uses context=. , this aligns local with CI.
context: .
dockerfile: workspace-server/Dockerfile
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
environment:
DATABASE_URL: postgres://${POSTGRES_USER:-dev}:${POSTGRES_PASSWORD:-dev}@postgres:5432/${POSTGRES_DB:-molecule}?sslmode=disable
REDIS_URL: redis://redis:6379
PORT: "${PLATFORM_PORT:-8080}"
PLATFORM_URL: "http://platform:${PLATFORM_PORT:-8080}"
# Container network namespace is already isolated; "all interfaces"
# inside the container = the bridge interface only. The fail-open
# default (127.0.0.1) would block host-to-container access.
BIND_ADDR: "${BIND_ADDR:-0.0.0.0}"
# Default MOLECULE_ENV=development so the WorkspaceAuth / AdminAuth
# middleware fail-open path activates when ADMIN_TOKEN is unset —
# otherwise the canvas (which runs without a bearer in pure local
# dev) gets 401 "missing workspace auth token" on every request.
# Override to "production" for SaaS/staged deploys; in those modes
# ADMIN_TOKEN must also be set or every request rejects.
MOLECULE_ENV: "${MOLECULE_ENV:-development}"
CORS_ORIGINS: ${CORS_ORIGINS:-http://localhost:${CANVAS_PUBLISH_PORT:-3000},http://127.0.0.1:${CANVAS_PUBLISH_PORT:-3000},http://localhost:3001}
RATE_LIMIT: "${RATE_LIMIT:-1000}"
CONFIGS_DIR: /configs
CONFIGS_HOST_DIR: "${CONFIGS_HOST_DIR:-${PWD}/workspace-configs-templates}"
PLUGINS_HOST_DIR: "${PLUGINS_HOST_DIR:-${PWD}/plugins}"
# github-app-auth plugin — injects GITHUB_TOKEN / GH_TOKEN into every
# workspace env from the App installation token. Remap the host-side
# path in GITHUB_APP_PRIVATE_KEY_FILE to /secrets/github-app.pem inside
# the container (the private key is bind-mounted below read-only).
# Soft-dep: skipped entirely when GITHUB_APP_ID is unset.
GITHUB_APP_ID: "${GITHUB_APP_ID:-}"
GITHUB_APP_INSTALLATION_ID: "${GITHUB_APP_INSTALLATION_ID:-}"
GITHUB_APP_PRIVATE_KEY_FILE: "/secrets/github-app.pem"
# ADMIN_TOKEN — required to fully close issue #684 (AdminAuth bearer bypass, PR #729).
# When set, only this exact value is accepted on all /admin/* and /approvals/* routes;
# workspace bearer tokens are no longer accepted as admin credentials.
# Unset (default) → backward-compat fallback: any valid workspace token passes AdminAuth
# (same behaviour as before PR #729, still vulnerable to #684).
# Generate: openssl rand -base64 32
# Store in fly secrets / deployment env — NEVER commit the actual value.
ADMIN_TOKEN: "${ADMIN_TOKEN:-}"
# Workspace hibernation default (issue #724 / PR #724). Sets platform-wide idle
# threshold (minutes); per-workspace column takes precedence. Leave empty to
# rely on per-workspace config only (current behaviour — global-default code pending).
HIBERNATION_IDLE_MINUTES: "${HIBERNATION_IDLE_MINUTES:-}"
# Plugin supply chain hardening (issue #768 / PR #775). Never set in production.
PLUGIN_ALLOW_UNPINNED: "${PLUGIN_ALLOW_UNPINNED:-}"
# Force ImagePull/ContainerCreate to request linux/amd64 manifests
# for the workspace-template-* images. The templates ship single-arch
# amd64 today; without this override, an arm64 host (Apple Silicon)
# asks the daemon for linux/arm64/v8, which doesn't match the manifest
# and the pull fails with "no matching manifest". Apple Silicon will
# run the amd64 image under Rosetta — slower (~2-3×) but functional.
# Override to "" or another platform when the templates start shipping
# multi-arch (then this hardcoded amd64 becomes unnecessary).
MOLECULE_IMAGE_PLATFORM: "${MOLECULE_IMAGE_PLATFORM:-linux/amd64}"
# GHCR auth for the workspace-images refresh endpoint
# (POST /admin/workspace-images/refresh). When set, the platform's
# Docker SDK ImagePull on private workspace-template-* images
# succeeds without per-host `docker login`. GHCR_USER is the GitHub
# username; GHCR_TOKEN is a fine-grained PAT with `read:packages`
# on the Molecule-AI org. Both unset → endpoint can only pull
# public images (current state for all 8 templates).
GHCR_USER: "${GHCR_USER:-}"
GHCR_TOKEN: "${GHCR_TOKEN:-}"
# Auto-refresh workspace-template-* images. The watcher polls GHCR
# every 5 min; when a digest moves, it pulls and force-recreates any
# matching ws-* containers (existing /admin/workspace-images/refresh
# logic). Closes the runtime CD chain: merge → containers running
# new code, no operator step. Default ON for local dev because that's
# where the runtime → ws iteration loop is tightest. Set to "false"
# if you don't want the platform to mutate ws-* containers behind
# your back during a long-running test.
IMAGE_AUTO_REFRESH: "${IMAGE_AUTO_REFRESH:-true}"
volumes:
- ./workspace-configs-templates:/configs
- ./org-templates:/org-templates:ro
- ./plugins:/plugins:ro
- /var/run/docker.sock:/var/run/docker.sock
# App private key — read-only bind-mount. The host-side path is
# gitignored per .gitignore rules (/.secrets/ + *.pem).
- ./.secrets/github-app.pem:/secrets/github-app.pem:ro
# Per-role persona credentials (molecule-core#242 local surface).
# Sourced at workspace creation time by org_import.go::loadPersonaEnvFile
# when a workspace.yaml carries `role: <name>`. The host-side dir is
# populated by the operator-host bootstrap kit (28 dev-tree personas);
# /etc/molecule-bootstrap/personas is the in-container path the
# platform expects (matches the prod tenant-EC2 path so the same code
# works in both modes).
#
# Read-only mount — workspace-server only reads, never writes here.
# If the host dir is empty/missing the platform's loadPersonaEnvFile
# silently no-ops per its existing semantics, so this mount is safe
# even on a fresh machine that hasn't run the bootstrap kit yet.
- ${MOLECULE_PERSONA_ROOT_HOST:-${HOME}/.molecule-ai/personas}:/etc/molecule-bootstrap/personas:ro
ports:
- "${PLATFORM_PUBLISH_PORT:-8080}:${PLATFORM_PORT:-8080}"
networks:
- molecule-core-net
restart: unless-stopped
healthcheck:
# Plain GET — `--spider` would issue HEAD, which returns 404 because
# /health is registered as GET only.
test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
interval: 5s
timeout: 5s
retries: 10
# --- Canvas ---
canvas:
# The publish-canvas-image CI workflow pushes a fresh image to GHCR on
# every canvas/** merge to main. To update the running container:
# docker compose pull canvas && docker compose up -d canvas
# First-time local setup or testing unreleased changes — build from source:
# docker compose build canvas && docker compose up -d canvas
# Note: ECR images require AWS auth — `aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 153263036946.dkr.ecr.us-east-2.amazonaws.com` before pull.
# Digest-pin requires: aws ecr describe-images --repository-name molecule-ai/canvas --image-tags latest --query 'imageDetails[0].imageDigest'
# TODO: pin canvas ECR image digest once AWS creds are available in CI.
image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:latest
build:
context: ./canvas
dockerfile: Dockerfile
args:
NEXT_PUBLIC_PLATFORM_URL: ${NEXT_PUBLIC_PLATFORM_URL:-http://localhost:${PLATFORM_PUBLISH_PORT:-8080}}
NEXT_PUBLIC_WS_URL: ${NEXT_PUBLIC_WS_URL:-ws://localhost:${PLATFORM_PUBLISH_PORT:-8080}/ws}
NEXT_PUBLIC_ADMIN_TOKEN: ${ADMIN_TOKEN:-}
depends_on:
platform:
condition: service_healthy
environment:
PORT: "${CANVAS_PORT:-3000}"
# Local dev — relaxes CSP to allow cross-port fetches (canvas:3000 → platform:8080).
CSP_DEV_MODE: "${CSP_DEV_MODE:-1}"
# NOTE: NEXT_PUBLIC_* are baked into the JS bundle at `next build` time —
# these runtime values are ignored by the standalone output. They're kept
# here for documentation / override during `docker compose build`.
NEXT_PUBLIC_PLATFORM_URL: ${NEXT_PUBLIC_PLATFORM_URL:-http://localhost:${PLATFORM_PUBLISH_PORT:-8080}}
NEXT_PUBLIC_WS_URL: ${NEXT_PUBLIC_WS_URL:-ws://localhost:${PLATFORM_PUBLISH_PORT:-8080}/ws}
ports:
- "${CANVAS_PUBLISH_PORT:-3000}:${CANVAS_PORT:-3000}"
networks:
- molecule-core-net
healthcheck:
test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
interval: 10s
timeout: 5s
retries: 10
# --- Optional: LiteLLM Proxy (unified OpenAI-compatible API for all providers) ---
# Start with: docker compose --profile multi-provider up
#
# Workspace agents then set:
# OPENAI_BASE_URL=http://litellm:4000
# OPENAI_API_KEY=${LITELLM_MASTER_KEY:-sk-molecule}
#
# And use model names from infra/litellm_config.yml (e.g. "claude-opus-4-5",
# "gpt-4o", "openrouter/deepseek-r1", "ollama/llama3.2").
# Edit infra/litellm_config.yml to add/remove providers and models.
# digest-pinned 2026-05-10 (sha256:7c311546c25e7bb6e8cafede9fcd3d0d622ac636b5c9418befaa32e85dfb0186)
# Refresh: curl -sI https://ghcr.io/v2/berriai/litellm/manifests/main-latest (Docker-Content-Digest header)
litellm:
image: ghcr.io/berriai/litellm/main-latest@sha256:7c311546c25e7bb6e8cafede9fcd3d0d622ac636b5c9418befaa32e85dfb0186
profiles:
- multi-provider
ports:
- "4000:4000"
volumes:
- ./infra/litellm_config.yml:/app/config.yaml:ro
command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "4"]
environment:
# Pass provider API keys through — only the ones you have are needed
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-}
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-molecule}
networks:
- molecule-core-net
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:4000/health || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 15s
# --- Optional: Local LLM Models via Ollama ---
# Start with: docker compose --profile local-models up
# After first start, pull a model:
# docker compose exec ollama ollama pull llama3.2
# docker compose exec ollama ollama pull qwen2.5-coder:7b
# Then set MODEL_PROVIDER=ollama:llama3.2 in your workspace config.yaml
# Workspace agents reach Ollama at http://ollama:11434 (internal Docker network).
# digest-pinned 2026-05-10 (sha256:90bd8ed1ad1853fbfb1ef5835f9d7a24fe890e05ace521e2d8d7a6f56bb667dd, linux/amd64)
# Refresh: curl -s https://hub.docker.com/v2/repositories/ollama/ollama/tags/latest | python3 -c "import json,sys; ..."
ollama:
image: ollama/ollama@sha256:90bd8ed1ad1853fbfb1ef5835f9d7a24fe890e05ace521e2d8d7a6f56bb667dd
profiles:
- local-models
ports:
- "11434:11434"
volumes:
- ollamadata:/root/.ollama
networks:
- molecule-core-net
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "ollama list || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
networks:
molecule-core-net:
name: molecule-core-net
volumes:
pgdata:
redisdata:
clickhousedata:
ollamadata: