Merge branch 'main' into fix/canvas-test-and-design-fixes
Some checks failed
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 22s
CI / Detect changes (pull_request) Successful in 1m4s
Harness Replays / detect-changes (pull_request) Failing after 12s
E2E API Smoke Test / detect-changes (pull_request) Successful in 48s
Harness Replays / Harness Replays (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 39s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 41s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 15s
sop-tier-check / tier-check (pull_request) Successful in 13s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 50s
CI / Platform (Go) (pull_request) Successful in 9s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 8s
CI / Python Lint & Test (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 12s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 10s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 16s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 9m18s
CI / Canvas (Next.js) (pull_request) Failing after 11m48s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Some checks failed
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 22s
CI / Detect changes (pull_request) Successful in 1m4s
Harness Replays / detect-changes (pull_request) Failing after 12s
E2E API Smoke Test / detect-changes (pull_request) Successful in 48s
Harness Replays / Harness Replays (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 39s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 41s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 15s
sop-tier-check / tier-check (pull_request) Successful in 13s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 50s
CI / Platform (Go) (pull_request) Successful in 9s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 8s
CI / Python Lint & Test (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 12s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 10s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 16s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 9m18s
CI / Canvas (Next.js) (pull_request) Failing after 11m48s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
This commit is contained in:
commit
45af892d5d
@ -46,26 +46,33 @@ set -euo pipefail
|
||||
|
||||
# Ensure jq is available. Runners may not have it pre-installed, and the
|
||||
# workflow-level jq install can fail on runners with network restrictions
|
||||
# (GitHub releases not reachable). This fallback is idempotent — no-op
|
||||
# when jq is already on PATH.
|
||||
# (GitHub releases not reachable from some runner networks — infra#241
|
||||
# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
|
||||
# SOP_FAIL_OPEN=1 makes this always exit 0 so CI never blocks on jq absence.
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::notice::jq not found on PATH — attempting install..."
|
||||
# Download jq binary; fall back to apt-get. Use subshell to isolate
|
||||
# from set -e so a failed install doesn't exit the script.
|
||||
(
|
||||
timeout 60 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq \
|
||||
&& chmod +x /usr/local/bin/jq \
|
||||
&& echo "::notice::jq binary installed: $(/usr/local/bin/jq --version)" \
|
||||
) || {
|
||||
apt-get update -qq && apt-get install -y -qq jq \
|
||||
&& echo "::notice::jq apt-installed: $(jq --version)"
|
||||
}
|
||||
# Verify jq is now available; if not, exit with clear error
|
||||
_jq_installed="no"
|
||||
# apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
|
||||
if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
_jq_installed="yes"
|
||||
# GitHub binary as secondary fallback — may fail on restricted networks.
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq \
|
||||
&& chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
_jq_installed="yes"
|
||||
fi
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::error::jq installation failed — neither binary download nor apt-get succeeded."
|
||||
echo "::error::jq installation failed — apt-get and GitHub binary both failed."
|
||||
echo "::error::sop-tier-check requires jq for all JSON API parsing."
|
||||
# SOP_FAIL_OPEN=1 is set in the workflow step's env — makes script always
|
||||
# exit 0 so CI never blocks. The SOP-6 tier review gate remains enforced.
|
||||
if [ "${SOP_FAIL_OPEN:-}" = "1" ]; then
|
||||
echo "::warning::SOP_FAIL_OPEN=1 — exiting 0 so CI does not block."
|
||||
exit 0
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -85,7 +85,7 @@ jobs:
|
||||
# OpenAI fallback — kept wired so an operator-dispatched run with
|
||||
# E2E_RUNTIME=hermes overridden via workflow_dispatch can still
|
||||
# exercise the OpenAI path without re-editing the workflow.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
E2E_MODE: canary
|
||||
E2E_RUNTIME: claude-code
|
||||
# Pin the canary to a specific MiniMax model rather than relying
|
||||
@ -140,7 +140,7 @@ jobs:
|
||||
fi
|
||||
;;
|
||||
langgraph|hermes)
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -147,7 +147,7 @@ jobs:
|
||||
# E2E_RUNTIME=langgraph or =hermes and still have a working
|
||||
# canary path. The script picks the right blob shape based on
|
||||
# which key is non-empty.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -175,7 +175,7 @@ jobs:
|
||||
|
||||
# LLM-key requirement is per-runtime: claude-code accepts
|
||||
# EITHER MiniMax OR direct-Anthropic (whichever is set first),
|
||||
# langgraph + hermes use OpenAI (MOLECULE_STAGING_OPENAI_KEY).
|
||||
# langgraph + hermes use OpenAI (MOLECULE_STAGING_OPENAI_API_KEY).
|
||||
case "${E2E_RUNTIME}" in
|
||||
claude-code)
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
@ -190,7 +190,7 @@ jobs:
|
||||
fi
|
||||
;;
|
||||
langgraph|hermes)
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -105,7 +105,7 @@ jobs:
|
||||
# OpenAI fallback — kept wired so an operator-dispatched run with
|
||||
# E2E_RUNTIME=hermes or =langgraph via workflow_dispatch can still
|
||||
# exercise the OpenAI path.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
|
||||
# Pin the model when running on the default claude-code path —
|
||||
# the per-runtime default ("sonnet") routes to direct Anthropic
|
||||
@ -152,7 +152,7 @@ jobs:
|
||||
fi
|
||||
;;
|
||||
langgraph|hermes)
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -82,22 +82,28 @@ jobs:
|
||||
# The sop-tier-check script uses jq for all JSON API parsing.
|
||||
# Install jq before the script runs so sop-tier-check can pass.
|
||||
#
|
||||
# Method: download binary directly from GitHub releases (faster and
|
||||
# more reliable than apt-get in containerized environments). Falls
|
||||
# back to apt-get if the download fails. The smoke test confirms
|
||||
# jq is on PATH before the main script runs.
|
||||
#
|
||||
# continue-on-error: true ensures this step failing does not fail the
|
||||
# job. The sop-tier-check script has its own jq fallback as a second
|
||||
# line of defense — this step failing gracefully is acceptable.
|
||||
# Method: apt-get first (reliable for Ubuntu runners with internet
|
||||
# access to package mirrors). Falls back to GitHub binary download.
|
||||
# GitHub releases may be unreachable from some runner networks
|
||||
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense. continue-on-error: true ensures this step
|
||||
# failing does not block the job.
|
||||
continue-on-error: true
|
||||
run: |
|
||||
timeout 60 curl -sSL \
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
# reachable from runner containers. GitHub releases may be blocked
|
||||
# or slow on some networks (infra#241 follow-up).
|
||||
if apt-get update -qq && apt-get install -y -qq jq; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq \
|
||||
|| apt-get update -qq && apt-get install -y -qq jq \
|
||||
|| echo "::warning::jq install methods failed — script fallback will retry"
|
||||
jq --version 2>/dev/null || echo "::notice::jq not yet available — script will install"
|
||||
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
else
|
||||
echo "::warning::jq install failed — apt-get and GitHub download both failed."
|
||||
fi
|
||||
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
|
||||
|
||||
- name: Verify tier label + reviewer team membership
|
||||
# continue-on-error: true at step level — job-level is ignored by Gitea
|
||||
|
||||
@ -73,8 +73,8 @@ jobs:
|
||||
AWS_REGION: ${{ secrets.AWS_REGION || 'us-east-1' }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_JANITOR_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_JANITOR_SECRET_ACCESS_KEY }}
|
||||
CP_PROD_ADMIN_TOKEN: ${{ secrets.CP_PROD_ADMIN_TOKEN }}
|
||||
CP_STAGING_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_TOKEN }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '50' }}
|
||||
GRACE_HOURS: ${{ github.event.inputs.grace_hours || '24' }}
|
||||
|
||||
@ -90,7 +90,7 @@ jobs:
|
||||
# they already accepted the repo state)
|
||||
run: |
|
||||
missing=()
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN; do
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY CP_ADMIN_API_TOKEN CP_STAGING_ADMIN_API_TOKEN; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
missing+=("$var")
|
||||
fi
|
||||
|
||||
@ -75,8 +75,8 @@ jobs:
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
|
||||
CP_PROD_ADMIN_TOKEN: ${{ secrets.CP_PROD_ADMIN_TOKEN }}
|
||||
CP_STAGING_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_TOKEN }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
@ -109,7 +109,7 @@ jobs:
|
||||
# so they can rerun after fixing the secret)
|
||||
run: |
|
||||
missing=()
|
||||
for var in CF_API_TOKEN CF_ZONE_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
for var in CF_API_TOKEN CF_ZONE_ID CP_ADMIN_API_TOKEN CP_STAGING_ADMIN_API_TOKEN AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
missing+=("$var")
|
||||
fi
|
||||
|
||||
@ -70,8 +70,8 @@ jobs:
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
|
||||
CP_PROD_ADMIN_TOKEN: ${{ secrets.CP_PROD_ADMIN_TOKEN }}
|
||||
CP_STAGING_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_TOKEN }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
|
||||
|
||||
steps:
|
||||
@ -89,7 +89,7 @@ jobs:
|
||||
# they already accepted the repo state)
|
||||
run: |
|
||||
missing=()
|
||||
for var in CF_API_TOKEN CF_ACCOUNT_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN; do
|
||||
for var in CF_API_TOKEN CF_ACCOUNT_ID CP_ADMIN_API_TOKEN CP_STAGING_ADMIN_API_TOKEN; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
missing+=("$var")
|
||||
fi
|
||||
|
||||
@ -11,6 +11,9 @@ services:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
networks:
|
||||
- molecule-core-net
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
|
||||
interval: 2s
|
||||
@ -25,6 +28,8 @@ services:
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-dev}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
|
||||
networks:
|
||||
- molecule-core-net
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
@ -45,6 +50,9 @@ services:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
networks:
|
||||
- molecule-core-net
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 2s
|
||||
@ -52,7 +60,7 @@ services:
|
||||
retries: 10
|
||||
|
||||
# digest-pinned 2026-05-10 (sha256:5b296e0ba1da74efea3143c773ddd60245f249fb7c72eb1d866c2d6ebc759fbe, linux/amd64)
|
||||
clickhouse:
|
||||
langfuse-clickhouse:
|
||||
image: clickhouse/clickhouse-server@sha256:5b296e0ba1da74efea3143c773ddd60245f249fb7c72eb1d866c2d6ebc759fbe
|
||||
environment:
|
||||
CLICKHOUSE_DB: langfuse
|
||||
@ -60,6 +68,8 @@ services:
|
||||
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-langfuse-dev}
|
||||
volumes:
|
||||
- clickhousedata:/var/lib/clickhouse
|
||||
networks:
|
||||
- molecule-core-net
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:8123/ping || exit 1"]
|
||||
interval: 5s
|
||||
@ -100,29 +110,6 @@ services:
|
||||
ports:
|
||||
- "8233:8080"
|
||||
|
||||
# digest-pinned 2026-05-10 (sha256:e7aafd3ccf721821b40f8b2251220b4bb8af5e4877b5c5a8846af5b3318aaf1d, linux/amd64)
|
||||
langfuse-web:
|
||||
image: langfuse/langfuse@sha256:e7aafd3ccf721821b40f8b2251220b4bb8af5e4877b5c5a8846af5b3318aaf1d
|
||||
depends_on:
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
langfuse-db-init:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
DATABASE_URL: postgres://${POSTGRES_USER:-dev}:${POSTGRES_PASSWORD:-dev}@postgres:5432/langfuse
|
||||
# Langfuse v2 expects the HTTP interface (port 8123). The previous
|
||||
# clickhouse://...:9000 native-protocol URL is rejected with
|
||||
# "ClickHouse URL protocol must be either http or https".
|
||||
CLICKHOUSE_URL: http://clickhouse:8123
|
||||
CLICKHOUSE_MIGRATION_URL: clickhouse://clickhouse:9000
|
||||
CLICKHOUSE_USER: langfuse
|
||||
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-langfuse-dev}
|
||||
NEXTAUTH_SECRET: ${LANGFUSE_SECRET:-changeme-langfuse-secret}
|
||||
NEXTAUTH_URL: http://localhost:3001
|
||||
SALT: ${LANGFUSE_SALT:-changeme-langfuse-salt}
|
||||
ports:
|
||||
- "3001:3000"
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: molecule-core-net
|
||||
|
||||
@ -3,85 +3,7 @@ include:
|
||||
- docker-compose.infra.yml
|
||||
|
||||
services:
|
||||
# --- Infrastructure ---
|
||||
# digest-pinned 2026-05-10 (sha256:4941ef97aaa2633ce9808f7766f8b8d746dd039ce8c51ca6da185c3dc63ab579, linux/amd64)
|
||||
postgres:
|
||||
image: postgres@sha256:4941ef97aaa2633ce9808f7766f8b8d746dd039ce8c51ca6da185c3dc63ab579
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-dev}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-molecule}
|
||||
command: ["postgres", "-c", "wal_level=logical"]
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
networks:
|
||||
- molecule-core-net
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
langfuse-db-init:
|
||||
image: postgres@sha256:4941ef97aaa2633ce9808f7766f8b8d746dd039ce8c51ca6da185c3dc63ab579
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-dev}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
export PGPASSWORD="$${POSTGRES_PASSWORD}"
|
||||
until pg_isready -h postgres -U "$${POSTGRES_USER}" -d postgres >/dev/null 2>&1; do
|
||||
sleep 1
|
||||
done
|
||||
if ! psql -h postgres -U "$${POSTGRES_USER}" -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'langfuse'" | grep -q 1; then
|
||||
psql -h postgres -U "$${POSTGRES_USER}" -d postgres -c "CREATE DATABASE langfuse"
|
||||
fi
|
||||
networks:
|
||||
- molecule-core-net
|
||||
|
||||
# digest-pinned 2026-05-10 (sha256:b1addbe72465a718643cff9e60a58e6df1841e29d6d7d60c9a85d8d72f08d1a7, linux/amd64)
|
||||
redis:
|
||||
image: redis@sha256:b1addbe72465a718643cff9e60a58e6df1841e29d6d7d60c9a85d8d72f08d1a7
|
||||
command: ["redis-server", "--notify-keyspace-events", "KEA"]
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redisdata:/data
|
||||
networks:
|
||||
- molecule-core-net
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
# --- Observability ---
|
||||
# digest-pinned 2026-05-10 (sha256:5b296e0ba1da74efea3143c773ddd60245f249fb7c72eb1d866c2d6ebc759fbe, linux/amd64)
|
||||
langfuse-clickhouse:
|
||||
image: clickhouse/clickhouse-server@sha256:5b296e0ba1da74efea3143c773ddd60245f249fb7c72eb1d866c2d6ebc759fbe
|
||||
environment:
|
||||
CLICKHOUSE_DB: langfuse
|
||||
CLICKHOUSE_USER: langfuse
|
||||
CLICKHOUSE_PASSWORD: langfuse
|
||||
volumes:
|
||||
- clickhousedata:/var/lib/clickhouse
|
||||
networks:
|
||||
- molecule-core-net
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:8123/ping || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
# digest-pinned 2026-05-10 (sha256:e7aafd3ccf721821b40f8b2251220b4bb8af5e4877b5c5a8846af5b3318aaf1d, linux/amd64)
|
||||
langfuse:
|
||||
image: langfuse/langfuse@sha256:e7aafd3ccf721821b40f8b2251220b4bb8af5e4877b5c5a8846af5b3318aaf1d
|
||||
|
||||
@ -40,8 +40,8 @@
|
||||
#
|
||||
# Env vars required:
|
||||
# AWS_REGION — region the secrets live in (default: us-east-1)
|
||||
# CP_PROD_ADMIN_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
# CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
# AWS_ACCESS_KEY_ID, — IAM principal with secretsmanager:ListSecrets
|
||||
# AWS_SECRET_ACCESS_KEY and secretsmanager:DeleteSecret. Note: the
|
||||
# prod molecule-cp principal does NOT have
|
||||
@ -88,8 +88,8 @@ need() {
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
need CP_PROD_ADMIN_TOKEN
|
||||
need CP_STAGING_ADMIN_TOKEN
|
||||
need CP_ADMIN_API_TOKEN
|
||||
need CP_STAGING_ADMIN_API_TOKEN
|
||||
need AWS_ACCESS_KEY_ID
|
||||
need AWS_SECRET_ACCESS_KEY
|
||||
|
||||
@ -107,13 +107,13 @@ log() { echo "[$(date -u +%H:%M:%S)] $*"; }
|
||||
# response includes both `id` and `slug`; we extract `id` here.
|
||||
|
||||
log "Fetching CP prod org ids..."
|
||||
PROD_IDS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_PROD_ADMIN_TOKEN" \
|
||||
PROD_IDS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['id'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " prod orgs: $(echo "$PROD_IDS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching CP staging org ids..."
|
||||
STAGING_IDS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_TOKEN" \
|
||||
STAGING_IDS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
"https://staging-api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['id'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " staging orgs: $(echo "$STAGING_IDS" | wc -w | tr -d ' ')"
|
||||
|
||||
@ -20,8 +20,8 @@
|
||||
# Env vars required:
|
||||
# CF_API_TOKEN — Cloudflare token with zone:dns:edit
|
||||
# CF_ZONE_ID — the zone (moleculesai.app)
|
||||
# CP_PROD_ADMIN_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
# CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
# AWS_* — standard AWS creds (default region us-east-2)
|
||||
#
|
||||
# Exit codes:
|
||||
@ -58,21 +58,21 @@ need() {
|
||||
}
|
||||
need CF_API_TOKEN
|
||||
need CF_ZONE_ID
|
||||
need CP_PROD_ADMIN_TOKEN
|
||||
need CP_STAGING_ADMIN_TOKEN
|
||||
need CP_ADMIN_API_TOKEN
|
||||
need CP_STAGING_ADMIN_API_TOKEN
|
||||
|
||||
log() { echo "[$(date -u +%H:%M:%S)] $*"; }
|
||||
|
||||
# --- Gather live sets ------------------------------------------------------
|
||||
|
||||
log "Fetching CP prod org slugs..."
|
||||
PROD_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_PROD_ADMIN_TOKEN" \
|
||||
PROD_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " prod orgs: $(echo "$PROD_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching CP staging org slugs..."
|
||||
STAGING_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_TOKEN" \
|
||||
STAGING_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
"https://staging-api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " staging orgs: $(echo "$STAGING_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
@ -31,8 +31,8 @@
|
||||
# token must include the tunnel scope.)
|
||||
# CF_ACCOUNT_ID — the account that owns the tunnels (visible
|
||||
# in dash.cloudflare.com URL path)
|
||||
# CP_PROD_ADMIN_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
# CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — dry-run completed or sweep executed successfully
|
||||
@ -72,21 +72,21 @@ need() {
|
||||
}
|
||||
need CF_API_TOKEN
|
||||
need CF_ACCOUNT_ID
|
||||
need CP_PROD_ADMIN_TOKEN
|
||||
need CP_STAGING_ADMIN_TOKEN
|
||||
need CP_ADMIN_API_TOKEN
|
||||
need CP_STAGING_ADMIN_API_TOKEN
|
||||
|
||||
log() { echo "[$(date -u +%H:%M:%S)] $*"; }
|
||||
|
||||
# --- Gather live sets ------------------------------------------------------
|
||||
|
||||
log "Fetching CP prod org slugs..."
|
||||
PROD_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_PROD_ADMIN_TOKEN" \
|
||||
PROD_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " prod orgs: $(echo "$PROD_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching CP staging org slugs..."
|
||||
STAGING_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_TOKEN" \
|
||||
STAGING_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
"https://staging-api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " staging orgs: $(echo "$STAGING_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
@ -341,7 +341,7 @@ tenant_call() {
|
||||
# MiniMax account). Lower friction than MiniMax for operators
|
||||
# who already have an Anthropic API key for their own Claude
|
||||
# Code session. Pricier per-token than MiniMax but billing is
|
||||
# still independent of MOLECULE_STAGING_OPENAI_KEY. Pinned to the
|
||||
# still independent of MOLECULE_STAGING_OPENAI_API_KEY. Pinned to the
|
||||
# claude-code runtime — hermes/langgraph use OpenAI-shaped envs.
|
||||
#
|
||||
# E2E_OPENAI_API_KEY → langgraph + hermes paths. Kept as fallback
|
||||
@ -368,7 +368,7 @@ elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
# who already have an Anthropic API key (e.g. for their own Claude
|
||||
# Code session) and want to avoid setting up a separate MiniMax
|
||||
# account just for E2E. Pricier per-token than MiniMax but billing
|
||||
# is still independent of MOLECULE_STAGING_OPENAI_KEY, so an OpenAI
|
||||
# is still independent of MOLECULE_STAGING_OPENAI_API_KEY, so an OpenAI
|
||||
# quota collapse doesn't wedge this path. Pinned to the claude-code
|
||||
# runtime: hermes/langgraph use OpenAI-shaped envs and won't honour
|
||||
# ANTHROPIC_API_KEY without further wiring (out of scope for this
|
||||
@ -623,7 +623,7 @@ fi
|
||||
# "Encrypted content is not supported" → hermes codex_responses API misroute (#14)
|
||||
# "Unknown provider" → bridge misconfigured PROVIDER= (regression of #13 fix)
|
||||
# "hermes-agent unreachable" → gateway process died
|
||||
# "exceeded your current quota" → MOLECULE_STAGING_OPENAI_KEY billing (NOT a platform regression — #2578)
|
||||
# "exceeded your current quota" → MOLECULE_STAGING_OPENAI_API_KEY billing (NOT a platform regression — #2578)
|
||||
#
|
||||
# Fail LOUD with the specific pattern so CI log + alert channel makes the
|
||||
# regression unambiguous.
|
||||
@ -657,7 +657,7 @@ fi
|
||||
# with a provider-side 429, that is a billing event on the configured
|
||||
# OpenAI key, not a platform regression. Tracked in #2578.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "exceeded your current quota|insufficient_quota"; then
|
||||
fail "A2A — PROVIDER QUOTA EXHAUSTED (NOT a platform regression). Operator action: top up MOLECULE_STAGING_OPENAI_KEY billing or rotate to a higher-quota org at Settings → Secrets and Variables → Actions. Tracked in #2578. Raw: $AGENT_TEXT"
|
||||
fail "A2A — PROVIDER QUOTA EXHAUSTED (NOT a platform regression). Operator action: top up MOLECULE_STAGING_OPENAI_API_KEY billing or rotate to a higher-quota org at Settings → Secrets and Variables → Actions. Tracked in #2578. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Generic catch-all — falls through if none of the known regressions hit.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
|
||||
|
||||
@ -167,12 +167,19 @@ async def _delegate_sync_via_polling(
|
||||
break
|
||||
if terminal:
|
||||
if (terminal.get("status") or "").lower() == "completed":
|
||||
return terminal.get("response_preview") or ""
|
||||
err = (
|
||||
# OFFSEC-003: sanitize response_preview before returning so
|
||||
# boundary markers injected by a malicious peer cannot escape
|
||||
# the trust boundary.
|
||||
return sanitize_a2a_result(terminal.get("response_preview") or "")
|
||||
# OFFSEC-003: sanitize error_detail / summary before wrapping with
|
||||
# the _A2A_ERROR_PREFIX sentinel so injected markers cannot appear
|
||||
# inside the trusted error block returned to the agent.
|
||||
err_raw = (
|
||||
terminal.get("error_detail")
|
||||
or terminal.get("summary")
|
||||
or "delegation failed"
|
||||
)
|
||||
err = sanitize_a2a_result(err_raw)
|
||||
return f"{_A2A_ERROR_PREFIX}{err}"
|
||||
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
@ -408,12 +415,11 @@ async def tool_check_task_status(
|
||||
# Filter by delegation_id
|
||||
matching = [d for d in delegations if d.get("delegation_id") == task_id]
|
||||
if matching:
|
||||
entry = dict(matching[0])
|
||||
# OFFSEC-003: sanitize peer-generated text fields
|
||||
for field in ("result", "response_preview"):
|
||||
if field in entry and entry[field]:
|
||||
entry[field] = sanitize_a2a_result(str(entry[field]))
|
||||
return json.dumps(entry)
|
||||
# OFFSEC-003: sanitize peer-supplied fields
|
||||
d = matching[0]
|
||||
d["summary"] = sanitize_a2a_result(d.get("summary", ""))
|
||||
d["response_preview"] = sanitize_a2a_result(d.get("response_preview", ""))
|
||||
return json.dumps(d)
|
||||
return json.dumps({"status": "not_found", "delegation_id": task_id})
|
||||
# Return all recent delegations
|
||||
summary = []
|
||||
@ -425,7 +431,7 @@ async def tool_check_task_status(
|
||||
"delegation_id": d.get("delegation_id", ""),
|
||||
"target_id": d.get("target_id", ""),
|
||||
"status": d.get("status", ""),
|
||||
"summary": d.get("summary", ""),
|
||||
"summary": sanitize_a2a_result(d.get("summary", "")),
|
||||
"response_preview": preview,
|
||||
})
|
||||
return json.dumps({"delegations": summary, "count": len(delegations)})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user