Renames Docker network across all code, configs, scripts, and docs. Per issue #93: the network was named molecule-monorepo-net as a holdover from when the repo was called molecule-monorepo. The canonical repo name is now molecule-core, so the network should be molecule-core-net. Files changed: - docker-compose.yml, docker-compose.infra.yml: network definition - infra/scripts/setup.sh: docker network create - scripts/nuke-and-rebuild.sh: docker network rm - workspace-server/internal/provisioner/provisioner.go: DefaultNetwork - All comments/docs: updated wording Acceptance: grep -rn 'molecule-monorepo-net' returns zero matches. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
341 lines
15 KiB
YAML
341 lines
15 KiB
YAML
# Include infra services (Temporal, Langfuse) so `docker compose up` starts the full stack.
|
||
include:
|
||
- docker-compose.infra.yml
|
||
|
||
services:
|
||
# --- Infrastructure ---
|
||
postgres:
|
||
image: postgres:16-alpine
|
||
environment:
|
||
POSTGRES_USER: ${POSTGRES_USER:-dev}
|
||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
|
||
POSTGRES_DB: ${POSTGRES_DB:-molecule}
|
||
command: ["postgres", "-c", "wal_level=logical"]
|
||
ports:
|
||
- "5432:5432"
|
||
volumes:
|
||
- pgdata:/var/lib/postgresql/data
|
||
networks:
|
||
- molecule-core-net
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
|
||
interval: 2s
|
||
timeout: 5s
|
||
retries: 10
|
||
|
||
langfuse-db-init:
|
||
image: postgres:16-alpine
|
||
depends_on:
|
||
postgres:
|
||
condition: service_healthy
|
||
environment:
|
||
POSTGRES_USER: ${POSTGRES_USER:-dev}
|
||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
|
||
command:
|
||
- /bin/sh
|
||
- -c
|
||
- |
|
||
export PGPASSWORD="$${POSTGRES_PASSWORD}"
|
||
until pg_isready -h postgres -U "$${POSTGRES_USER}" -d postgres >/dev/null 2>&1; do
|
||
sleep 1
|
||
done
|
||
if ! psql -h postgres -U "$${POSTGRES_USER}" -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'langfuse'" | grep -q 1; then
|
||
psql -h postgres -U "$${POSTGRES_USER}" -d postgres -c "CREATE DATABASE langfuse"
|
||
fi
|
||
networks:
|
||
- molecule-core-net
|
||
|
||
redis:
|
||
image: redis:7-alpine
|
||
command: ["redis-server", "--notify-keyspace-events", "KEA"]
|
||
ports:
|
||
- "6379:6379"
|
||
volumes:
|
||
- redisdata:/data
|
||
networks:
|
||
- molecule-core-net
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
test: ["CMD", "redis-cli", "ping"]
|
||
interval: 2s
|
||
timeout: 5s
|
||
retries: 10
|
||
|
||
# --- Observability ---
|
||
langfuse-clickhouse:
|
||
image: clickhouse/clickhouse-server:24-alpine
|
||
environment:
|
||
CLICKHOUSE_DB: langfuse
|
||
CLICKHOUSE_USER: langfuse
|
||
CLICKHOUSE_PASSWORD: langfuse
|
||
volumes:
|
||
- clickhousedata:/var/lib/clickhouse
|
||
networks:
|
||
- molecule-core-net
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:8123/ping || exit 1"]
|
||
interval: 5s
|
||
timeout: 5s
|
||
retries: 10
|
||
|
||
langfuse:
|
||
image: langfuse/langfuse:2
|
||
depends_on:
|
||
langfuse-clickhouse:
|
||
condition: service_healthy
|
||
langfuse-db-init:
|
||
condition: service_completed_successfully
|
||
environment:
|
||
DATABASE_URL: postgres://${POSTGRES_USER:-dev}:${POSTGRES_PASSWORD:-dev}@postgres:5432/langfuse
|
||
# Langfuse v2 expects the HTTP interface (port 8123). The previous
|
||
# clickhouse://...:9000 native-protocol URL is rejected with
|
||
# "ClickHouse URL protocol must be either http or https".
|
||
CLICKHOUSE_URL: http://langfuse-clickhouse:8123
|
||
CLICKHOUSE_MIGRATION_URL: clickhouse://langfuse-clickhouse:9000
|
||
CLICKHOUSE_USER: langfuse
|
||
CLICKHOUSE_PASSWORD: langfuse
|
||
NEXTAUTH_SECRET: ${LANGFUSE_SECRET:-changeme-langfuse-secret}
|
||
NEXTAUTH_URL: http://localhost:3001
|
||
SALT: ${LANGFUSE_SALT:-changeme-langfuse-salt}
|
||
ports:
|
||
- "3001:3000"
|
||
networks:
|
||
- molecule-core-net
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/public/health || exit 1"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 10
|
||
|
||
# --- Platform ---
|
||
platform:
|
||
build:
|
||
# Build context MUST be repo root, not ./platform — the Dockerfile
|
||
# COPYs `workspace-server/migrations`, `workspace-server/go.mod`,
|
||
# `workspace-configs-templates/` etc. via repo-relative paths so it
|
||
# can bake in templates + migrations alongside the platform binary.
|
||
# When context was ./platform earlier, docker silently cached an
|
||
# earlier image (the COPY workspace-server/migrations resolved to nothing
|
||
# under ./workspace-server/, so layers stopped invalidating) — manifested
|
||
# as migration 023 not landing after PR #417 merged. CI workflow
|
||
# already uses context=. , this aligns local with CI.
|
||
context: .
|
||
dockerfile: workspace-server/Dockerfile
|
||
depends_on:
|
||
postgres:
|
||
condition: service_healthy
|
||
redis:
|
||
condition: service_healthy
|
||
environment:
|
||
DATABASE_URL: postgres://${POSTGRES_USER:-dev}:${POSTGRES_PASSWORD:-dev}@postgres:5432/${POSTGRES_DB:-molecule}?sslmode=disable
|
||
REDIS_URL: redis://redis:6379
|
||
PORT: "${PLATFORM_PORT:-8080}"
|
||
PLATFORM_URL: "http://platform:${PLATFORM_PORT:-8080}"
|
||
# Container network namespace is already isolated; "all interfaces"
|
||
# inside the container = the bridge interface only. The fail-open
|
||
# default (127.0.0.1) would block host-to-container access.
|
||
BIND_ADDR: "${BIND_ADDR:-0.0.0.0}"
|
||
# Default MOLECULE_ENV=development so the WorkspaceAuth / AdminAuth
|
||
# middleware fail-open path activates when ADMIN_TOKEN is unset —
|
||
# otherwise the canvas (which runs without a bearer in pure local
|
||
# dev) gets 401 "missing workspace auth token" on every request.
|
||
# Override to "production" for SaaS/staged deploys; in those modes
|
||
# ADMIN_TOKEN must also be set or every request rejects.
|
||
MOLECULE_ENV: "${MOLECULE_ENV:-development}"
|
||
CORS_ORIGINS: ${CORS_ORIGINS:-http://localhost:${CANVAS_PUBLISH_PORT:-3000},http://127.0.0.1:${CANVAS_PUBLISH_PORT:-3000},http://localhost:3001}
|
||
RATE_LIMIT: "${RATE_LIMIT:-1000}"
|
||
CONFIGS_DIR: /configs
|
||
CONFIGS_HOST_DIR: "${CONFIGS_HOST_DIR:-${PWD}/workspace-configs-templates}"
|
||
PLUGINS_HOST_DIR: "${PLUGINS_HOST_DIR:-${PWD}/plugins}"
|
||
# github-app-auth plugin — injects GITHUB_TOKEN / GH_TOKEN into every
|
||
# workspace env from the App installation token. Remap the host-side
|
||
# path in GITHUB_APP_PRIVATE_KEY_FILE to /secrets/github-app.pem inside
|
||
# the container (the private key is bind-mounted below read-only).
|
||
# Soft-dep: skipped entirely when GITHUB_APP_ID is unset.
|
||
GITHUB_APP_ID: "${GITHUB_APP_ID:-}"
|
||
GITHUB_APP_INSTALLATION_ID: "${GITHUB_APP_INSTALLATION_ID:-}"
|
||
GITHUB_APP_PRIVATE_KEY_FILE: "/secrets/github-app.pem"
|
||
# ADMIN_TOKEN — required to fully close issue #684 (AdminAuth bearer bypass, PR #729).
|
||
# When set, only this exact value is accepted on all /admin/* and /approvals/* routes;
|
||
# workspace bearer tokens are no longer accepted as admin credentials.
|
||
# Unset (default) → backward-compat fallback: any valid workspace token passes AdminAuth
|
||
# (same behaviour as before PR #729, still vulnerable to #684).
|
||
# Generate: openssl rand -base64 32
|
||
# Store in fly secrets / deployment env — NEVER commit the actual value.
|
||
ADMIN_TOKEN: "${ADMIN_TOKEN:-}"
|
||
# Workspace hibernation default (issue #724 / PR #724). Sets platform-wide idle
|
||
# threshold (minutes); per-workspace column takes precedence. Leave empty to
|
||
# rely on per-workspace config only (current behaviour — global-default code pending).
|
||
HIBERNATION_IDLE_MINUTES: "${HIBERNATION_IDLE_MINUTES:-}"
|
||
# Plugin supply chain hardening (issue #768 / PR #775). Never set in production.
|
||
PLUGIN_ALLOW_UNPINNED: "${PLUGIN_ALLOW_UNPINNED:-}"
|
||
# Force ImagePull/ContainerCreate to request linux/amd64 manifests
|
||
# for the workspace-template-* images. The templates ship single-arch
|
||
# amd64 today; without this override, an arm64 host (Apple Silicon)
|
||
# asks the daemon for linux/arm64/v8, which doesn't match the manifest
|
||
# and the pull fails with "no matching manifest". Apple Silicon will
|
||
# run the amd64 image under Rosetta — slower (~2-3×) but functional.
|
||
# Override to "" or another platform when the templates start shipping
|
||
# multi-arch (then this hardcoded amd64 becomes unnecessary).
|
||
MOLECULE_IMAGE_PLATFORM: "${MOLECULE_IMAGE_PLATFORM:-linux/amd64}"
|
||
# GHCR auth for the workspace-images refresh endpoint
|
||
# (POST /admin/workspace-images/refresh). When set, the platform's
|
||
# Docker SDK ImagePull on private workspace-template-* images
|
||
# succeeds without per-host `docker login`. GHCR_USER is the GitHub
|
||
# username; GHCR_TOKEN is a fine-grained PAT with `read:packages`
|
||
# on the Molecule-AI org. Both unset → endpoint can only pull
|
||
# public images (current state for all 8 templates).
|
||
GHCR_USER: "${GHCR_USER:-}"
|
||
GHCR_TOKEN: "${GHCR_TOKEN:-}"
|
||
# Auto-refresh workspace-template-* images. The watcher polls GHCR
|
||
# every 5 min; when a digest moves, it pulls and force-recreates any
|
||
# matching ws-* containers (existing /admin/workspace-images/refresh
|
||
# logic). Closes the runtime CD chain: merge → containers running
|
||
# new code, no operator step. Default ON for local dev because that's
|
||
# where the runtime → ws iteration loop is tightest. Set to "false"
|
||
# if you don't want the platform to mutate ws-* containers behind
|
||
# your back during a long-running test.
|
||
IMAGE_AUTO_REFRESH: "${IMAGE_AUTO_REFRESH:-true}"
|
||
volumes:
|
||
- ./workspace-configs-templates:/configs
|
||
- ./org-templates:/org-templates:ro
|
||
- ./plugins:/plugins:ro
|
||
- /var/run/docker.sock:/var/run/docker.sock
|
||
# App private key — read-only bind-mount. The host-side path is
|
||
# gitignored per .gitignore rules (/.secrets/ + *.pem).
|
||
- ./.secrets/github-app.pem:/secrets/github-app.pem:ro
|
||
# Per-role persona credentials (molecule-core#242 local surface).
|
||
# Sourced at workspace creation time by org_import.go::loadPersonaEnvFile
|
||
# when a workspace.yaml carries `role: <name>`. The host-side dir is
|
||
# populated by the operator-host bootstrap kit (28 dev-tree personas);
|
||
# /etc/molecule-bootstrap/personas is the in-container path the
|
||
# platform expects (matches the prod tenant-EC2 path so the same code
|
||
# works in both modes).
|
||
#
|
||
# Read-only mount — workspace-server only reads, never writes here.
|
||
# If the host dir is empty/missing the platform's loadPersonaEnvFile
|
||
# silently no-ops per its existing semantics, so this mount is safe
|
||
# even on a fresh machine that hasn't run the bootstrap kit yet.
|
||
- ${MOLECULE_PERSONA_ROOT_HOST:-${HOME}/.molecule-ai/personas}:/etc/molecule-bootstrap/personas:ro
|
||
ports:
|
||
- "${PLATFORM_PUBLISH_PORT:-8080}:${PLATFORM_PORT:-8080}"
|
||
networks:
|
||
- molecule-core-net
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
# Plain GET — `--spider` would issue HEAD, which returns 404 because
|
||
# /health is registered as GET only.
|
||
test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
|
||
interval: 5s
|
||
timeout: 5s
|
||
retries: 10
|
||
|
||
# --- Canvas ---
|
||
canvas:
|
||
# The publish-canvas-image CI workflow pushes a fresh image to GHCR on
|
||
# every canvas/** merge to main. To update the running container:
|
||
# docker compose pull canvas && docker compose up -d canvas
|
||
# First-time local setup or testing unreleased changes — build from source:
|
||
# docker compose build canvas && docker compose up -d canvas
|
||
# Note: ECR images require AWS auth — `aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 153263036946.dkr.ecr.us-east-2.amazonaws.com` before pull.
|
||
image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:latest
|
||
build:
|
||
context: ./canvas
|
||
dockerfile: Dockerfile
|
||
args:
|
||
NEXT_PUBLIC_PLATFORM_URL: ${NEXT_PUBLIC_PLATFORM_URL:-http://localhost:${PLATFORM_PUBLISH_PORT:-8080}}
|
||
NEXT_PUBLIC_WS_URL: ${NEXT_PUBLIC_WS_URL:-ws://localhost:${PLATFORM_PUBLISH_PORT:-8080}/ws}
|
||
NEXT_PUBLIC_ADMIN_TOKEN: ${ADMIN_TOKEN:-}
|
||
depends_on:
|
||
platform:
|
||
condition: service_healthy
|
||
environment:
|
||
PORT: "${CANVAS_PORT:-3000}"
|
||
# Local dev — relaxes CSP to allow cross-port fetches (canvas:3000 → platform:8080).
|
||
CSP_DEV_MODE: "${CSP_DEV_MODE:-1}"
|
||
# NOTE: NEXT_PUBLIC_* are baked into the JS bundle at `next build` time —
|
||
# these runtime values are ignored by the standalone output. They're kept
|
||
# here for documentation / override during `docker compose build`.
|
||
NEXT_PUBLIC_PLATFORM_URL: ${NEXT_PUBLIC_PLATFORM_URL:-http://localhost:${PLATFORM_PUBLISH_PORT:-8080}}
|
||
NEXT_PUBLIC_WS_URL: ${NEXT_PUBLIC_WS_URL:-ws://localhost:${PLATFORM_PUBLISH_PORT:-8080}/ws}
|
||
ports:
|
||
- "${CANVAS_PUBLISH_PORT:-3000}:${CANVAS_PORT:-3000}"
|
||
networks:
|
||
- molecule-core-net
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 10
|
||
|
||
# --- Optional: LiteLLM Proxy (unified OpenAI-compatible API for all providers) ---
|
||
# Start with: docker compose --profile multi-provider up
|
||
#
|
||
# Workspace agents then set:
|
||
# OPENAI_BASE_URL=http://litellm:4000
|
||
# OPENAI_API_KEY=${LITELLM_MASTER_KEY:-sk-molecule}
|
||
#
|
||
# And use model names from infra/litellm_config.yml (e.g. "claude-opus-4-5",
|
||
# "gpt-4o", "openrouter/deepseek-r1", "ollama/llama3.2").
|
||
# Edit infra/litellm_config.yml to add/remove providers and models.
|
||
litellm:
|
||
image: ghcr.io/berriai/litellm:main-latest
|
||
profiles:
|
||
- multi-provider
|
||
ports:
|
||
- "4000:4000"
|
||
volumes:
|
||
- ./infra/litellm_config.yml:/app/config.yaml:ro
|
||
command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "4"]
|
||
environment:
|
||
# Pass provider API keys through — only the ones you have are needed
|
||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
|
||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-}
|
||
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-molecule}
|
||
networks:
|
||
- molecule-core-net
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:4000/health || exit 1"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 5
|
||
start_period: 15s
|
||
|
||
# --- Optional: Local LLM Models via Ollama ---
|
||
# Start with: docker compose --profile local-models up
|
||
# After first start, pull a model:
|
||
# docker compose exec ollama ollama pull llama3.2
|
||
# docker compose exec ollama ollama pull qwen2.5-coder:7b
|
||
# Then set MODEL_PROVIDER=ollama:llama3.2 in your workspace config.yaml
|
||
# Workspace agents reach Ollama at http://ollama:11434 (internal Docker network).
|
||
ollama:
|
||
image: ollama/ollama:latest
|
||
profiles:
|
||
- local-models
|
||
ports:
|
||
- "11434:11434"
|
||
volumes:
|
||
- ollamadata:/root/.ollama
|
||
networks:
|
||
- molecule-core-net
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "ollama list || exit 1"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 5
|
||
start_period: 20s
|
||
|
||
networks:
|
||
molecule-core-net:
|
||
name: molecule-core-net
|
||
|
||
volumes:
|
||
pgdata:
|
||
redisdata:
|
||
clickhousedata:
|
||
ollamadata:
|