Merge pull request #2202 from Molecule-AI/staging

staging → main: e2e teardown patience (#2201) one-time bridge
This commit is contained in:
Hongming Wang 2026-04-28 12:40:38 -07:00 committed by GitHub
commit 9f2878d185
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 1009 additions and 80 deletions

View File

@ -79,6 +79,7 @@ jobs:
id-token: write # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed
outputs:
version: ${{ steps.version.outputs.version }}
wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
steps:
- uses: actions/checkout@v4
@ -129,6 +130,28 @@ jobs:
working-directory: ${{ runner.temp }}/runtime-build
run: python -m build
- name: Capture wheel SHA256 for cascade content-verification
# Recorded BEFORE upload so the cascade probe can verify the
# bytes Fastly serves under the new version's URL match what
# we built. Closes a hole left by #2197: that probe verified
# pip can resolve the version (catches propagation lag) but
# not that the wheel content matches (would silently pass a
# Fastly stale-content scenario where the new version's URL
# serves an old wheel binary).
id: wheel_hash
working-directory: ${{ runner.temp }}/runtime-build
run: |
set -eu
WHEEL=$(ls dist/*.whl 2>/dev/null | head -1)
if [ -z "$WHEEL" ]; then
echo "::error::No .whl in dist/ — `python -m build` must have failed silently"
exit 1
fi
HASH=$(sha256sum "$WHEEL" | awk '{print $1}')
echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT"
echo "Local wheel SHA256 (pre-upload): ${HASH}"
echo "Wheel filename: $(basename "$WHEEL")"
- name: Verify package contents (sanity)
working-directory: ${{ runner.temp }}/runtime-build
run: |
@ -212,6 +235,56 @@ jobs:
default_output_modes=['text/plain', 'application/json'],
)
print('✓ AgentCard call-shape smoke passed')
# Well-known agent-card path probe alignment. main.py's
# _send_initial_prompt() polls AGENT_CARD_WELL_KNOWN_PATH
# to know when the local A2A server is ready. If the SDK
# ever splits the constant value from the path that
# create_agent_card_routes() actually mounts at, every
# workspace silently drops its initial_prompt:
# - Probe gets 404 every attempt.
# - Falls through to 'server not ready after 30s,
# skipping' even though the server is fine.
# - The user hits a fresh chat with no kickoff context.
# This was the #2193 incident class — the v0.x → v1.x
# rename of /.well-known/agent.json → /.well-known/agent-card.json
# plus the constant itself moving to a2a.utils.constants.
# source-tree pytest (test_agent_card_well_known_path.py)
# catches main.py-side regressions; this catches the
# SDK-side ones BEFORE PyPI upload.
from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
from a2a.server.routes import create_agent_card_routes
mounted_paths = [
getattr(r, 'path', None)
for r in create_agent_card_routes(
AgentCard(
name='wk-smoke',
description='well-known mount alignment',
version='0.0.0-smoke',
)
)
]
assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, (
f'AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) '
f'is NOT among paths mounted by create_agent_card_routes '
f'({mounted_paths!r}). The SDK constant and its own route '
f'factory have drifted — workspace probes will 404 forever, '
f'silently dropping every workspace initial_prompt.'
)
print(f'✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})')
# Message helper smoke. a2a-sdk renamed
# new_agent_text_message → new_text_message in the v1.x
# protobuf-flat migration (per the v0→v1 cheat sheet). main.py
# and a2a_executor.py call new_text_message in hot paths; if
# the import breaks, every reply errors with ImportError before
# the message even leaves the workspace. Importing here
# catches a future v2.x rename at publish time.
from a2a.helpers import new_text_message
msg = new_text_message('smoke')
assert msg is not None, 'new_text_message returned None'
print('✓ message helper import + call OK')
print('✓ smoke import passed')
"
@ -239,28 +312,98 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Wait for PyPI to propagate the new version
# PyPI accepts the upload, then takes a few seconds to make it
# available via the package index. If the cascade fires too
# fast, downstream template builds run `pip install` against
# an index that hasn't seen the new version yet — they resolve
# to the previous one, and docker layer cache then locks that
# in for subsequent rebuilds (the cache trap that bit us five
# times tonight).
# PyPI accepts the upload, then takes a few seconds to make the
# new version visible across all THREE surfaces pip touches:
# 1. /pypi/<pkg>/<ver>/json — metadata endpoint
# 2. /simple/<pkg>/ — pip's primary download index
# 3. files.pythonhosted.org — CDN-fronted wheel binary
# Each has its own cache. The previous check polled only (1)
# and would let the cascade fire while (2) or (3) still served
# the previous version, so downstream `pip install` resolved
# to the old wheel. Docker layer cache then locked that stale
# resolution in for subsequent rebuilds (the cache trap that
# bit us five times in one night).
#
# Poll PyPI's JSON API for up to 60s. Cheap (~50ms per poll),
# avoids over-trusting "publish job said success."
# Two-stage probe per poll:
# (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds
# only when the version is resolvable. Catches surface (1)
# and (2) propagation lag.
# (b) `pip download` of the same wheel + SHA256 compare against
# the just-built dist's hash. Catches surface (3) lag AND
# Fastly serving stale content under the new version's URL
# (a separate Fastly-corruption mode that pip-install alone
# can't see, since pip install resolves+unpacks against
# whatever bytes Fastly returns and never inspects them).
# Both must pass before the cascade fans out.
#
# The venv is reused across polls; only `pip install`/`pip
# download` run in the loop, with --force-reinstall +
# --no-cache-dir so the previous poll's cached state doesn't
# mask propagation lag.
env:
RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }}
run: |
set -eu
if [ -z "$EXPECTED_SHA256" ]; then
echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade."
exit 1
fi
python -m venv /tmp/propagation-probe
PROBE=/tmp/propagation-probe/bin
$PROBE/pip install --upgrade --quiet pip
# Poll budget: 30 attempts × (~3-5s pip install + ~3s pip
# download + 4s sleep) ≈ 5-6 min wall on a slow GH runner.
# Generous vs PyPI's typical few-seconds propagation;
# failures past this are signal of a real PyPI / Fastly
# issue, not just lag.
for i in $(seq 1 30); do
if curl -fsS "https://pypi.org/pypi/molecule-ai-workspace-runtime/${RUNTIME_VERSION}/json" >/dev/null 2>&1; then
echo "::notice::✓ PyPI serving ${RUNTIME_VERSION} after ${i} polls"
exit 0
# Stage (a): can pip resolve and install the version?
if $PROBE/pip install \
--quiet \
--no-cache-dir \
--force-reinstall \
--no-deps \
"molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
>/dev/null 2>&1; then
INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \
| awk -F': ' '/^Version:/{print $2}')
if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then
# Stage (b): does Fastly serve the bytes we uploaded?
# `pip download` writes the actual .whl file to disk so
# we can sha256sum it (vs `pip install` which unpacks
# and discards).
rm -rf /tmp/probe-dl
mkdir -p /tmp/probe-dl
if $PROBE/pip download \
--quiet \
--no-cache-dir \
--no-deps \
--dest /tmp/probe-dl \
"molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
>/dev/null 2>&1; then
WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1)
if [ -n "$WHEEL" ]; then
ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}')
if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then
echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})"
exit 0
fi
# Hash mismatch: PyPI accepted our upload but Fastly
# is serving different bytes under the version's URL.
# Most often this is propagation lag of the BINARY
# surface — the version is resolvable but the wheel
# cache hasn't caught up. Retry.
echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying"
fi
fi
fi
fi
sleep 2
sleep 4
done
echo "::error::PyPI never propagated ${RUNTIME_VERSION} within 60s — refusing to fan out cascade against stale index"
echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min."
echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}"
echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces."
exit 1
- name: Fan out repository_dispatch

View File

@ -10,15 +10,29 @@ name: Runtime Pin Compatibility
# 4. Every tenant workspace crashed; the canary tenant caught it but
# only after 5 hours of degraded staging
#
# This workflow installs the runtime in a fresh Python venv from PyPI
# and tries the same import the EC2 user-data does. If pip resolution
# silently produces a broken combo, this gate fails before the tenant
# image gets published.
# This workflow installs the CURRENTLY PUBLISHED runtime from PyPI on
# top of `workspace/requirements.txt` and smoke-imports. Catches:
# - Upstream PyPI yanks
# - Bad re-releases of molecule-ai-workspace-runtime
# - Already-shipped wheels that stop importing because a transitive
# dep moved underneath
#
# This is the "PyPI artifact health" half of pin compatibility. The
# companion workflow `runtime-prbuild-compat.yml` covers the
# "PR-introduced breakage" half by building the wheel from THIS PR's
# workspace/ source. Splitting the two means each gets a narrow
# `paths:` filter — the pypi-latest job no longer fires on doc-only
# workspace/ edits whose content can't change what's currently on PyPI.
on:
push:
branches: [main, staging]
paths:
# Narrow filter: pypi-latest is sensitive only to changes that
# affect what we're INSTALLING (requirements.txt) or WHAT THE
# CHECK ITSELF DOES (this workflow file). Edits to workspace/
# source code don't change what's on PyPI right now, so they
# don't change this gate's verdict.
- 'workspace/requirements.txt'
- '.github/workflows/runtime-pin-compat.yml'
pull_request:
@ -42,8 +56,8 @@ concurrency:
cancel-in-progress: true
jobs:
default-install:
name: Default install + import smoke
pypi-latest-install:
name: PyPI-latest install + import smoke
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

View File

@ -0,0 +1,100 @@
name: Runtime PR-Built Compatibility
# Companion to `runtime-pin-compat.yml`. That workflow tests what's
# CURRENTLY PUBLISHED on PyPI; this workflow tests what WOULD BE
# PUBLISHED if THIS PR merges.
#
# Why two workflows: the chicken-and-egg #128 fix added a "PR-built
# wheel" job to the original runtime-pin-compat.yml, but both jobs
# shared a `paths:` filter that was the union of their needs
# (`workspace/**`). That meant the PyPI-latest job ran on every doc
# edit even though the upstream PyPI artifact can't change with our
# workspace/ source. Splitting the two means each gets a narrow
# `paths:` filter that matches the inputs it actually depends on.
#
# Catches the failure mode where a PR adds an import requiring a newer
# SDK than `workspace/requirements.txt` pins:
# 1. Pip resolves the existing PyPI wheel + the old SDK pin → smoke
# passes (it imports the OLD main.py from the wheel, not the PR's
# new main.py).
# 2. Merge → publish-runtime.yml ships a wheel WITH the new import.
# 3. Tenant images redeploy → all crash on first boot with
# ImportError.
#
# By building from the PR's source and smoke-importing THAT wheel, we
# fail at PR-time instead of after publish.
on:
push:
branches: [main, staging]
paths:
# Broad filter: this workflow's verdict can change whenever any
# workspace/ source file changes (because the wheel we build is
# produced from those files), or when the build script itself
# changes (it controls the wheel layout).
- 'workspace/**'
- 'scripts/build_runtime_package.py'
- '.github/workflows/runtime-prbuild-compat.yml'
pull_request:
branches: [main, staging]
paths:
- 'workspace/**'
- 'scripts/build_runtime_package.py'
- '.github/workflows/runtime-prbuild-compat.yml'
workflow_dispatch:
# Required-check support: when this becomes a branch-protection gate,
# merge_group runs let the queue green-check this in addition to PRs.
merge_group:
types: [checks_requested]
# No cron: the same pre-merge run already covered the commit, and
# re-running daily wouldn't surface anything new (workspace/ doesn't
# change between cron firings unless a PR already passed this gate).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
local-build-install:
# Builds the wheel from THIS PR's workspace/ + scripts/ and tests
# IT — the artifact that WOULD be published if this PR merges.
name: PR-built wheel + import smoke
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: pip
cache-dependency-path: workspace/requirements.txt
- name: Install build tooling
run: pip install build
- name: Build wheel from PR source (mirrors publish-runtime.yml)
# Use a fixed test version so the wheel filename is predictable.
# Doesn't reach PyPI — this build is local-only for the smoke.
# Use the SAME build script with the SAME args as
# publish-runtime.yml's build step. The temp dir path differs
# (`/tmp/runtime-build` here vs `${{ runner.temp }}/runtime-build`
# in publish-runtime.yml — they coincide on ubuntu-latest but
# the call sites are not byte-identical). The smoke import is
# also intentionally narrower than publish's: this gate exists
# to catch SDK-version-import drift specifically; full invariant
# coverage lives in publish-runtime.yml's own pre-PyPI smoke.
run: |
python scripts/build_runtime_package.py \
--version "0.0.0.dev0+pin-compat" \
--out /tmp/runtime-build
cd /tmp/runtime-build && python -m build
- name: Install built wheel + workspace requirements
run: |
python -m venv /tmp/venv-built
/tmp/venv-built/bin/pip install --upgrade pip
/tmp/venv-built/bin/pip install /tmp/runtime-build/dist/*.whl
/tmp/venv-built/bin/pip install -r workspace/requirements.txt
/tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \
| grep -E '^(Name|Version):'
- name: Smoke import the PR-built wheel
env:
WORKSPACE_ID: 00000000-0000-0000-0000-000000000001
run: |
/tmp/venv-built/bin/python -c "from molecule_runtime.main import main_sync; print('PR-built runtime imports OK')"

View File

@ -1,6 +1,6 @@
# Ecosystem Watch — Phase 30 Competitive Tracking
**Created by:** PMM
**Date:** 2026-04-21
**Date:** 2026-04-27
**Status:** ACTIVE — competitor monitoring in progress
**Phase:** 30 — Remote Workspaces + Cross-Network Federation
@ -118,7 +118,7 @@ Track competitor releases and market events that affect Phase 30 positioning. En
- **Check frequency:** Every marketing cycle
- **Trigger:** Any competitor shipping something that invalidates a Phase 30 positioning claim
- **File location:** `docs/ecosystem-watch.md` (origin/main)
- **Last updated by:** PMM | 2026-04-23 (LangGraph PRs verified OPEN; new feat PRs #1730/#1702/#1731 logged; release note written)
- **Last updated by:** PMM | 2026-04-27 (weekly refresh — all competitor versions, stars, and PRs verified current; Clawith v1.9.0 added)
---

View File

@ -7,20 +7,46 @@ This path is aligned to the current repository and current UI. It gets you from
- Docker + Docker Compose v2
- Node.js 20+
- Go 1.25+
- `jq` (for the template-registry clone in `setup.sh`)
- One model/API key for the runtime you want to use
- `ANTHROPIC_API_KEY`
- `OPENAI_API_KEY`
- `GOOGLE_API_KEY`
- or another provider routed through LiteLLM
## Step 1: Clone the repository
## The one-command path
```bash
git clone https://github.com/Molecule-AI/molecule-monorepo.git
cd molecule-monorepo
./scripts/dev-start.sh
```
That single script:
1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs)
2. Brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal via `infra/scripts/setup.sh`
3. Populates the workspace template + plugin registry from `manifest.json`
4. Builds and starts the platform on `http://localhost:8080`
5. Installs canvas deps (first run) and starts the canvas on `http://localhost:3000`
6. Prints next-step instructions and tails both processes — `Ctrl-C` tears everything down
Total wall-clock: ~30 seconds for a re-run, ~2 minutes for a first run (npm install + docker pulls).
Once the canvas is up: open it, add your model API key in **Config → Secrets & API Keys → Global**, then click a template card or **+ Create blank workspace**.
## Manual setup (advanced)
If you'd rather run each component yourself — useful when you're iterating on the platform binary or the canvas in isolation — follow the steps below. Each section is what `dev-start.sh` does internally; running them by hand gives you per-component logs and lets you keep one piece running while you restart another.
### Step 1: Clone the repository
```bash
git clone https://github.com/Molecule-AI/molecule-monorepo.git
cd molecule-monorepo
```
## Step 2: Start the shared infrastructure
### Step 2: Start the shared infrastructure
Recommended:
@ -28,7 +54,7 @@ Recommended:
./infra/scripts/setup.sh
```
That brings up Postgres, Redis, and Langfuse.
That brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal.
If you only want the raw compose flow:
@ -36,7 +62,7 @@ If you only want the raw compose flow:
docker compose -f docker-compose.infra.yml up -d
```
## Step 3: Start the platform
### Step 3: Start the platform
```bash
cd workspace-server
@ -45,7 +71,7 @@ go run ./cmd/server
The control plane listens on `http://localhost:8080`.
## Step 4: Start the canvas
### Step 4: Start the canvas
In a new terminal:

View File

@ -1,69 +1,182 @@
#!/bin/sh
# dev-start.sh — one-command local development environment.
#
# Starts: Postgres, Redis, Platform (Go :8080), Canvas (Next.js :3000)
# Stops all on Ctrl-C.
# What it does (in order):
# 1. Generates ADMIN_TOKEN into .env if missing (closes #684 fail-open)
# 2. Runs infra/scripts/setup.sh (postgres + redis + langfuse + clickhouse
# + temporal + populates template/plugin registry from manifest.json)
# 3. Starts the platform (Go :8080), waits for /health
# 4. Starts the canvas (Next.js :3000), waits for HTTP 200
# 5. Prints a readiness banner with API-key add instructions
# 6. On Ctrl-C, kills both background processes and tears down infra
#
# Prerequisites:
# - Docker (for Postgres + Redis)
# - Go 1.25+ (for platform)
# - Node.js 20+ (for canvas)
# - Docker + Docker Compose v2 (for postgres/redis/langfuse/etc)
# - Go 1.25+ (for the platform binary)
# - Node.js 20+ (for the canvas)
# - jq (for setup.sh's manifest clone — optional;
# without it, template palette will be
# empty until you run clone-manifest.sh
# manually)
#
# Usage:
# ./scripts/dev-start.sh
# # Open http://localhost:3000
# # Open http://localhost:3000, add your model API key in
# # Config → Secrets & API Keys, then create your first workspace.
#
# Idempotent: re-running picks up where the last run left off (existing
# .env is preserved, npm install skipped if node_modules present, etc).
set -e
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
ENV_FILE="$ROOT/.env"
cleanup() {
echo ""
echo "Shutting down..."
echo "==> Shutting down..."
kill $PLATFORM_PID $CANVAS_PID 2>/dev/null || true
# Use setup.sh's compose file (full infra) since that's what we
# brought up. `down` keeps named volumes by default — call with
# --volumes here only if you want a clean slate (we don't, since
# idempotent re-runs are the usual case).
docker compose -f "$ROOT/docker-compose.infra.yml" down 2>/dev/null || true
echo "Done."
echo " Done."
}
trap cleanup EXIT INT TERM
echo "==> Starting infrastructure (Postgres, Redis)..."
docker compose -f "$ROOT/docker-compose.infra.yml" up -d
# ─────────────────────────────────────────────── 1. dev-mode auth posture
echo "==> Waiting for Postgres..."
until docker compose -f "$ROOT/docker-compose.infra.yml" exec -T postgres pg_isready -q 2>/dev/null; do
sleep 1
done
echo " Postgres ready."
# The AdminAuth middleware closes its fail-open the moment the first
# workspace token lands in the DB — at which point /workspaces and
# other admin routes 401 unless the caller has either ADMIN_TOKEN or
# the dev-mode escape hatch. The canvas at localhost:3000 has no
# bearer token to send, so without one of those two paths it can't
# call admin endpoints after a workspace exists.
#
# For local dev the right posture is the dev-mode escape hatch:
#
# MOLECULE_ENV=development AND ADMIN_TOKEN unset
#
# That makes middleware.isDevModeFailOpen() return true and lets the
# canvas keep working without a bearer. Setting ADMIN_TOKEN here
# would BREAK the canvas (it has no way to read that token in dev).
#
# For SaaS the platform is provisioned with ADMIN_TOKEN set AND
# MOLECULE_ENV=production — either one closes the hatch. So the dev
# mode signal here is safe (it's only active when both other knobs
# are absent).
if [ -f "$ENV_FILE" ] && grep -q '^MOLECULE_ENV=' "$ENV_FILE"; then
echo "==> Reusing MOLECULE_ENV from existing .env"
else
echo "==> Setting MOLECULE_ENV=development in .env (dev-mode auth hatch)"
{
if [ -f "$ENV_FILE" ]; then
cat "$ENV_FILE"
echo ""
fi
echo "# Generated by scripts/dev-start.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "# Local-dev auth posture: dev-mode fail-open lets the canvas at"
echo "# localhost:3000 call admin endpoints without a bearer token."
echo "# DO NOT set ADMIN_TOKEN here in dev — it would close the hatch"
echo "# and the canvas would 401 on every admin call."
echo "MOLECULE_ENV=development"
} > "$ENV_FILE.tmp"
mv "$ENV_FILE.tmp" "$ENV_FILE"
echo " Saved to $ENV_FILE"
fi
echo "==> Starting Platform (Go :8080)..."
# Source .env so the platform inherits ADMIN_TOKEN (and anything else
# the user has added — e.g. ANTHROPIC_API_KEY for skipping the canvas
# Secrets UI). `set -a` exports every assignment in the sourced file
# without us having to know the var names.
set -a
# shellcheck disable=SC1090
. "$ENV_FILE"
set +a
# ─────────────────────────────────────────────── 2. infra + templates
# Use setup.sh (not raw docker-compose) so the template registry gets
# populated from manifest.json. Without that, the canvas template
# palette is empty and the user has to manually clone repos — exactly
# the friction this script exists to eliminate.
echo "==> Running infra/scripts/setup.sh (infra + template registry)"
"$ROOT/infra/scripts/setup.sh"
# ─────────────────────────────────────────────── 3. platform
echo "==> Starting Platform (Go :8080)"
cd "$ROOT/workspace-server"
go run ./cmd/server &
go run ./cmd/server > /tmp/molecule-platform.log 2>&1 &
PLATFORM_PID=$!
echo "==> Waiting for Platform health..."
until curl -sf http://localhost:8080/health >/dev/null 2>&1; do
echo " Waiting for Platform /health..."
PLATFORM_READY=0
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
21 22 23 24 25 26 27 28 29 30; do
if curl -sf http://localhost:8080/health >/dev/null 2>&1; then
echo " Platform ready (t+${i}s)"
PLATFORM_READY=1
break
fi
sleep 1
done
echo " Platform ready."
if [ "$PLATFORM_READY" -ne 1 ]; then
echo " ✗ Platform did not respond in 30s — check /tmp/molecule-platform.log"
exit 1
fi
echo "==> Starting Canvas (Next.js :3000)..."
# ─────────────────────────────────────────────── 4. canvas
echo "==> Starting Canvas (Next.js :3000)"
cd "$ROOT/canvas"
if [ ! -d node_modules ]; then
echo " First-run: npm install (~30-60s)"
npm install
fi
npm run dev &
npm run dev > /tmp/molecule-canvas.log 2>&1 &
CANVAS_PID=$!
echo ""
echo "============================================"
echo " Molecule AI dev environment running"
echo ""
echo " Canvas: http://localhost:3000"
echo " Platform: http://localhost:8080"
echo " Postgres: localhost:5432"
echo " Redis: localhost:6379"
echo ""
echo " Press Ctrl-C to stop all services"
echo "============================================"
echo " Waiting for Canvas HTTP 200..."
CANVAS_READY=0
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
21 22 23 24 25 26 27 28 29 30; do
code=$(curl -sf -o /dev/null -w "%{http_code}" http://localhost:3000/ 2>/dev/null || echo "0")
if [ "$code" = "200" ]; then
echo " Canvas ready (t+${i}s)"
CANVAS_READY=1
break
fi
sleep 1
done
if [ "$CANVAS_READY" -ne 1 ]; then
echo " ✗ Canvas did not respond in 30s — check /tmp/molecule-canvas.log"
exit 1
fi
# ─────────────────────────────────────────────── 5. readiness banner
cat <<EOF
═══════════════════════════════════════════════════════════
Molecule AI dev environment ready
Canvas: http://localhost:3000
Platform: http://localhost:8080
Logs: /tmp/molecule-platform.log
/tmp/molecule-canvas.log
Next steps:
1. Open http://localhost:3000 in a browser.
2. Add your model API key in
Config → Secrets & API Keys → Global
(skip if ANTHROPIC_API_KEY / OPENAI_API_KEY is already
set in .env — the platform inherits it.)
3. Click a template card or "+ Create blank workspace".
Press Ctrl-C to stop all services.
═══════════════════════════════════════════════════════════
EOF
wait

View File

@ -86,24 +86,47 @@ cleanup_org() {
fi
log "🧹 Tearing down org $SLUG..."
curl "${CURL_COMMON[@]}" -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
# The DELETE handler runs the GDPR Art. 17 cascade synchronously
# (Stripe + Redis + EC2 terminate + CF tunnel + DNS + DB rows). Real
# observed wall-time on prod-shaped infra is ~3090s — EC2 termination
# alone takes 3060s. The 515s estimate in `purge.go`'s comment is
# the API-call cost, NOT the AWS-side time-to-termination it waits on.
#
# Two-part patience to match reality:
# 1. 120s curl timeout on the DELETE itself (was 30s) so the
# synchronous cascade has room to complete in-band.
# 2. Poll up to 60s after for organizations.status='purged' (or row
# gone) instead of one rigid 10s sleep — covers the case where
# DELETE returns 5xx mid-cascade and the cascade finishes anyway,
# and the case where DELETE legitimately exceeds 120s and we want
# eventual-consistency confirmation.
curl "${CURL_COMMON[@]}" --max-time 120 -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"confirm\":\"$SLUG\"}" >/dev/null 2>&1 \
&& ok "Teardown request accepted" \
|| log "Teardown returned non-2xx (may already be gone)"
sleep 10
local leak_count
leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \
2>/dev/null || echo 0)
local leak_count=1
local elapsed=0
while [ "$elapsed" -lt 60 ]; do
leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \
2>/dev/null || echo 1)
if [ "$leak_count" = "0" ]; then
break
fi
sleep 5
elapsed=$((elapsed + 5))
done
if [ "$leak_count" != "0" ]; then
echo "⚠️ LEAK: org $SLUG still present post-teardown (count=$leak_count)" >&2
echo "⚠️ LEAK: org $SLUG still present post-teardown after ${elapsed}s (count=$leak_count)" >&2
exit 4
fi
ok "Teardown clean — no orphan resources for $SLUG"
ok "Teardown clean — no orphan resources for $SLUG (${elapsed}s)"
# Normalize unexpected upstream exit codes to 1 (generic failure). The
# script's documented contract (header "Exit codes" section) only emits

View File

@ -112,12 +112,15 @@ func sweepOnce(parent context.Context, reaper OrphanReaper) {
ctx, cancel := context.WithTimeout(parent, orphanSweepDeadline)
defer cancel()
// Two independent passes. Each handles its own short-circuit; an
// empty result or transient error in one must NOT stop the other,
// Three independent passes. Each handles its own short-circuit; an
// empty result or transient error in one must NOT stop the others,
// since the wiped-DB pass exists precisely for cases where the
// removed-row pass finds zero candidates (DB has been dropped).
// removed-row pass finds zero candidates (DB has been dropped) and
// the stale-token pass exists for the mirror case (DB persists but
// /configs volume has been wiped).
sweepRemovedRows(ctx, reaper)
sweepLabeledOrphansWithoutRows(ctx, reaper)
sweepStaleTokensWithoutContainer(ctx, reaper)
}
// sweepRemovedRows is the original sweep: ws-* containers (by name
@ -290,3 +293,181 @@ func sweepLabeledOrphansWithoutRows(ctx context.Context, reaper OrphanReaper) {
}
}
}
// staleTokenGrace bounds how recently a token must have been used (or
// issued, if never used) for it to be considered "potentially live".
// Anything quieter than this is fair game for the stale-token revoke
// pass when there's no matching container.
//
// Sized vs the heartbeat cadence (30s) and provisioning latency: a
// healthy workspace touches `last_used_at` every heartbeat, so 5min is
// 10× the heartbeat interval — enough headroom that brief container
// restarts (Stop → Start) don't trip the pass. A workspace that's been
// silent past this window AND has no container is either a wiped-volume
// orphan or a workspace nobody is using; either way, revoking is safe
// because the next /registry/register mints a fresh token via the
// no-live-tokens bootstrap branch in registry.go.
const staleTokenGrace = 5 * time.Minute
// sweepStaleTokensWithoutContainer revokes workspace_auth_tokens rows
// for workspaces whose /configs volume must have been wiped — detected
// as "live token in DB whose owning workspace has no live Docker
// container". This heals the user-reported failure mode where
// `docker compose down -v` (or any out-of-band volume removal) leaves
// stale tokens in the DB while the recreated container has an empty
// `/configs/.auth_token`. Without this pass, /registry/register on the
// fresh container 401s forever (requireWorkspaceToken sees live tokens,
// container can't present one), and the workspace is permanently
// wedged until an operator manually revokes via SQL.
//
// The platform's restart endpoint already handles this case correctly
// via wsauth.RevokeAllForWorkspace inside issueAndInjectToken — this
// pass is the safety net for the equivalent action taken outside the
// API (operator did `docker compose down -v`, host crashed mid-restart,
// disk pressure evicted a volume, etc).
//
// Safety filters that bound the revoke radius:
//
// 1. Only runs in single-tenant Docker mode. The orphan sweeper is
// wired only when prov != nil (see cmd/server/main.go) — in CP/SaaS
// mode there is no Docker daemon and the sweeper doesn't run, so an
// empty container list cannot be confused with "no Docker at all"
// here (which would otherwise revoke every workspace's tokens).
// The function also short-circuits on a nil reaper as a belt-and-
// braces guard against a future refactor wiring it incorrectly.
//
// 2. staleTokenGrace skips tokens that were issued or used in the
// last 5 minutes. Bounds the race with mid-provisioning (token
// issued moments before docker run completes) and brief restart
// windows.
//
// 3. CRITICAL: the staleness predicate is enforced AT THE UPDATE,
// not just at the SELECT. This closes a TOCTOU race against
// workspace_provision.go:issueAndInjectToken — the platform's
// restart endpoint Stops the container synchronously then dispatches
// re-provisioning to a goroutine, so a stale-on-SELECT workspace
// can have a fresh token inserted by issueAndInjectToken between
// our SELECT and our UPDATE. A predicate-only `WHERE workspace_id
// = $1 AND revoked_at IS NULL` UPDATE would catch that fresh token
// too. Carrying COALESCE(last_used_at, created_at) < now() - grace
// in the UPDATE makes the operation idempotent against fresh
// inserts: a token created within the grace window cannot match.
//
// 4. The DB query joins on workspaces.status NOT IN ('removed',
// 'provisioning') so deleted and mid-restart workspaces are not
// revoked here — those are handled at delete time and by
// issueAndInjectToken respectively. (`status = 'provisioning'` is
// set synchronously in workspace_restart.go before the async
// re-provision begins, so it's a reliable in-flight signal.)
//
// 5. Each revocation is logged with the workspace ID so operators can
// correlate "workspace just lost auth" with this sweeper, not blame
// a network blip.
//
// Failure mode: revoke fails for some reason (transient DB error). The
// next sweep cycle (60s out) retries. Worst case: a workspace stays
// 401-blocked an extra minute.
func sweepStaleTokensWithoutContainer(ctx context.Context, reaper OrphanReaper) {
// Defence-in-depth (F2): a future refactor that wires the sweeper
// in CP/SaaS mode without checking prov would otherwise hit this
// pass with a nil reaper. The StartOrphanSweeper entry point
// already short-circuits on nil, but we don't want to depend on
// every future caller doing the same.
if reaper == nil {
return
}
prefixes, err := reaper.ListWorkspaceContainerIDPrefixes(ctx)
if err != nil {
log.Printf("Orphan sweeper: ListWorkspaceContainerIDPrefixes failed: %v — skipping stale-token pass", err)
return
}
// Same hex-and-dash filter as the other passes — anything that
// can't be a workspace UUID prefix doesn't belong in a SQL LIKE
// pattern.
//
// NOTE: an empty `likes` array is intentionally NOT a short-circuit.
// "No workspace containers" is the load-bearing case for this pass
// (operator nuked everything). The `cardinality($1) = 0` clause in
// the SELECT below treats empty likes as "no LIKE filter" → every
// stale-token workspace becomes a candidate. The first two passes'
// early-return-on-empty-prefixes pattern would defeat this entire
// pass's purpose.
likes := make([]string, 0, len(prefixes))
for _, p := range prefixes {
if !isLikelyWorkspaceID(p) {
continue
}
likes = append(likes, p+"%")
}
// Find workspaces with live tokens whose most-recent activity is
// past the grace window AND whose ID does NOT match any live
// container prefix. When `likes` is empty (no workspace containers
// running at all), every stale-activity workspace is a candidate —
// expressed via the `cardinality($1) = 0` short-circuit so the
// query has a single shape regardless of container count.
//
// make_interval(secs => $2) avoids the time.Duration.String() →
// `"5m0s"` mismatch with Postgres interval grammar; passing seconds
// as an int keeps the binding portable.
graceSeconds := int(staleTokenGrace.Seconds())
rows, qErr := db.DB.QueryContext(ctx, `
SELECT DISTINCT t.workspace_id::text
FROM workspace_auth_tokens t
JOIN workspaces w ON w.id = t.workspace_id
WHERE t.revoked_at IS NULL
AND w.status NOT IN ('removed', 'provisioning')
AND COALESCE(t.last_used_at, t.created_at) < now() - make_interval(secs => $2)
AND (
cardinality($1::text[]) = 0
OR NOT (t.workspace_id::text LIKE ANY($1::text[]))
)
`, pq.Array(likes), graceSeconds)
if qErr != nil {
log.Printf("Orphan sweeper: stale-token query failed: %v — skipping stale-token pass", qErr)
return
}
defer rows.Close()
var staleWorkspaceIDs []string
for rows.Next() {
var id string
if scanErr := rows.Scan(&id); scanErr != nil {
log.Printf("Orphan sweeper: stale-token row scan failed: %v", scanErr)
continue
}
staleWorkspaceIDs = append(staleWorkspaceIDs, id)
}
if iterErr := rows.Err(); iterErr != nil {
log.Printf("Orphan sweeper: stale-token rows iteration failed: %v", iterErr)
return
}
// Per-workspace UPDATE with the SAME staleness predicate as the
// SELECT, so any token inserted between SELECT and UPDATE (e.g.
// issueAndInjectToken racing during a user-triggered restart of a
// long-idle workspace) is automatically excluded — its created_at
// is fresh and won't satisfy `< now() - grace`.
//
// We deliberately bypass wsauth.RevokeAllForWorkspace here because
// that helper revokes EVERY live token for the workspace; we want
// "every STALE live token", which is a different (safer) operation.
for _, wsID := range staleWorkspaceIDs {
log.Printf("Orphan sweeper: revoking stale tokens for workspace %s (no live container; volume likely wiped)", wsID)
_, revokeErr := db.DB.ExecContext(ctx, `
UPDATE workspace_auth_tokens
SET revoked_at = now()
WHERE workspace_id = $1
AND revoked_at IS NULL
AND COALESCE(last_used_at, created_at) < now() - make_interval(secs => $2)
`, wsID, graceSeconds)
if revokeErr != nil {
// Non-fatal — next sweep retries. Bail on the loop so a
// systemic DB error doesn't spam the log on every iteration.
log.Printf("Orphan sweeper: stale-token revoke for %s failed: %v — will retry next cycle", wsID, revokeErr)
return
}
}
}

View File

@ -10,6 +10,24 @@ import (
"github.com/DATA-DOG/go-sqlmock"
)
// expectStaleTokenSweepNoOp registers the third-pass query
// (sweepStaleTokensWithoutContainer) returning zero rows. The third
// pass runs unconditionally on every sweepOnce, so every test that
// doesn't specifically exercise stale-token revocation must register
// this expectation or sqlmock will fail "unexpected query".
//
// Centralising the regex here keeps the existing test suite readable —
// individual tests don't have to spell out a query they're not actually
// asserting against.
//
// The regex is anchored at the start of the query AND requires the
// status-filter to keep us from accidentally matching a future query
// that opens with the same column name. R3 from the review.
func expectStaleTokenSweepNoOp(mock sqlmock.Sqlmock) {
mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}))
}
// fakeReaper is a hand-rolled OrphanReaper for the sweeper tests.
// Records every Stop / RemoveVolume call so tests can assert which
// workspace IDs got reconciled.
@ -73,6 +91,7 @@ func TestSweepOnce_ReconcilesRunningRemovedRows(t *testing.T) {
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("abc123def456-0000-0000-0000-000000000000"))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -96,8 +115,11 @@ func TestSweepOnce_NoRunningContainers(t *testing.T) {
reaper := &fakeReaper{listResponse: nil}
// No DB query expected — if sweepOnce makes one anyway the
// sqlmock will fail "unexpected query".
// First two passes short-circuit on empty container lists. The
// third pass (stale-token sweep) DOES query — that's its whole
// reason for existing in the no-containers case (operator nuked
// everything). Mock it returning no stale tokens.
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 0 {
@ -145,6 +167,7 @@ func TestSweepOnce_StopFailureLeavesVolume(t *testing.T) {
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("abc123def456-0000-0000-0000-000000000000"))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -173,6 +196,7 @@ func TestSweepOnce_VolumeRemoveErrorIsNonFatal(t *testing.T) {
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("aaa111bbb222-0000-0000-0000-000000000000").
AddRow("ccc333ddd444-0000-0000-0000-000000000000"))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -205,9 +229,12 @@ func TestSweepOnce_FiltersNonWorkspacePrefixes(t *testing.T) {
},
}
// No DB query expected — every prefix is rejected before the
// query builds, so we short-circuit. sqlmock fails on any
// unexpected query.
// First-pass query is skipped — every prefix is rejected before
// the query builds. Third-pass query still runs (filtered prefixes
// + non-empty input list still produces an empty likes array,
// which the third-pass treats the same as "no containers running"
// → stale-token candidates with no LIKE filter). Mock it empty.
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 0 {
@ -289,6 +316,7 @@ func TestSweepOnce_WipedDBReapsLabeledOrphans(t *testing.T) {
// returns no rows — both prefixes are unknown.
mock.ExpectQuery(`SELECT lk\s+FROM unnest`).
WillReturnRows(sqlmock.NewRows([]string{"lk"}))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -328,6 +356,7 @@ func TestSweepOnce_WipedDBSkipsLabeledContainersWithRows(t *testing.T) {
WillReturnRows(sqlmock.NewRows([]string{"lk"}).
AddRow("abc123def456%").
AddRow("ee0011223344%"))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -355,6 +384,7 @@ func TestSweepOnce_WipedDBReapsOnlyTheUnknownOnes(t *testing.T) {
mock.ExpectQuery(`SELECT lk\s+FROM unnest`).
WillReturnRows(sqlmock.NewRows([]string{"lk"}).
AddRow(keep + "%"))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -380,7 +410,10 @@ func TestSweepOnce_WipedDBSkippedOnDockerError(t *testing.T) {
}
// No DB query expected for the second pass since we error out
// before reaching SQL.
// before reaching SQL. The third pass (stale-token sweep) uses
// ListWorkspaceContainerIDPrefixes (which succeeded with empty
// here, not the same call that errored), so it DOES query.
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
if len(reaper.stopCalls) != 0 {
@ -410,6 +443,7 @@ func TestSweepOnce_WipedDBSkipsNonUUIDPrefixes(t *testing.T) {
// that should appear in the unnest array.
mock.ExpectQuery(`SELECT lk\s+FROM unnest`).
WillReturnRows(sqlmock.NewRows([]string{"lk"}))
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
@ -420,3 +454,204 @@ func TestSweepOnce_WipedDBSkipsNonUUIDPrefixes(t *testing.T) {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// =============================================================================
// Third pass: sweepStaleTokensWithoutContainer
//
// Heals the user-reported "auth token conflict after volume wipe" failure mode.
// Scenario: operator runs `docker compose down -v` (or any out-of-band volume
// removal); DB still has tokens for workspaces whose recreated containers boot
// with empty /configs and 401 forever on /registry/register.
// =============================================================================
// TestSweepOnce_StaleTokenRevokeFiresWhenNoContainer — the headline
// case. A workspace has live tokens in the DB but no live container
// matches it (volume-wipe scenario). The third pass revokes.
func TestSweepOnce_StaleTokenRevokeFiresWhenNoContainer(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Two name-shaped containers running, both have status='removed'
// rows so first pass reaps them. Second pass finds nothing
// (managed list empty in this scenario). Third pass: even though
// the running containers cover those two prefixes, an unrelated
// workspace ID (no live container, no name prefix match) has
// stale tokens — revoke it.
const orphanedID = "deadbeef-0000-0000-0000-000000000000"
reaper := &fakeReaper{listResponse: []string{"abc123def456"}}
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("abc123def456-0000-0000-0000-000000000000"))
// Third-pass query returns the orphaned workspace.
// Tight regex pins the safety guards: status-filter excludes
// 'removed' and 'provisioning' (R2 + the C1 fix), and the
// staleness predicate appears in the SELECT.
mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\).*COALESCE\(t\.last_used_at, t\.created_at\) < now\(\) - make_interval`).
WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}).
AddRow(orphanedID))
// Revoke executes one UPDATE — and the UPDATE itself MUST also
// carry the staleness predicate (closes the C1 TOCTOU race
// against issueAndInjectToken inserting a fresh token between
// our SELECT and our UPDATE).
mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval`).
WithArgs(orphanedID, 300).
WillReturnResult(sqlmock.NewResult(0, 1))
sweepOnce(context.Background(), reaper)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_StaleTokenSkippedWhenContainerExists — pin the safety
// guarantee: a workspace with both live tokens AND a live container
// must NOT be revoked. The query's NOT LIKE clause is the gate; this
// test exercises that gate by having the third-pass query return zero
// rows (the live-container workspace is filtered out).
func TestSweepOnce_StaleTokenSkippedWhenContainerExists(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// One running container; first pass returns no removed-row matches.
reaper := &fakeReaper{listResponse: []string{"abc123def456"}}
mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}))
// Third-pass query: the live workspace has live tokens but its
// prefix matches the running container, so the NOT LIKE excludes
// it. Result: zero stale tokens.
expectStaleTokenSweepNoOp(mock)
sweepOnce(context.Background(), reaper)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_StaleTokenRevokeFailureBailsLoop — a transient DB
// error during revoke must not spam the log on every iteration.
// Bail out of the loop; next 60s cycle retries.
func TestSweepOnce_StaleTokenRevokeFailureBailsLoop(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{listResponse: nil}
// Third-pass returns two stale-token workspaces; the first revoke
// errors. Loop must bail without attempting the second.
mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}).
AddRow("aaaa1111-0000-0000-0000-000000000000").
AddRow("bbbb2222-0000-0000-0000-000000000000"))
mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval`).
WithArgs("aaaa1111-0000-0000-0000-000000000000", 300).
WillReturnError(errors.New("connection reset"))
// No second ExpectExec: if the loop tries it, sqlmock fails
// "unexpected call".
sweepOnce(context.Background(), reaper)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_StaleTokenQueryErrorIsNonFatal — a transient DB error
// on the SELECT must not prevent the rest of sweepOnce from making
// progress. (In this test there's no other progress to make either,
// just verifying no panic + the cycle completes.)
func TestSweepOnce_StaleTokenQueryErrorIsNonFatal(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{listResponse: nil}
mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
WillReturnError(errors.New("connection reset"))
sweepOnce(context.Background(), reaper)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_StaleTokenRevokeUsesStalenessPredicate — pin the C1
// race fix: the per-workspace UPDATE must carry the staleness
// predicate so a token inserted by issueAndInjectToken between our
// SELECT and our UPDATE is automatically excluded (its created_at is
// fresh and won't satisfy `< now() - grace`).
//
// This test asserts the SHAPE of the UPDATE (predicate present, grace
// argument bound). A real-Postgres integration test would prove the
// race resolution end-to-end; this catches the regression where
// someone "simplifies" the UPDATE back to a predicate-only revoke.
func TestSweepOnce_StaleTokenRevokeUsesStalenessPredicate(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
const orphanedID = "deadbeef-0000-0000-0000-000000000000"
reaper := &fakeReaper{listResponse: nil}
mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}).
AddRow(orphanedID))
// The UPDATE regex requires every guard: workspace_id binding,
// revoked_at IS NULL, AND the staleness predicate using the SAME
// COALESCE expression as the SELECT. Loosening any of these
// would re-open the C1 race, and this regex would no longer match.
mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval\(secs => \$2\)`).
WithArgs(orphanedID, 300).
WillReturnResult(sqlmock.NewResult(0, 1))
sweepOnce(context.Background(), reaper)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepStaleTokens_NilReaperEarlyExit — defence-in-depth (F2):
// even though StartOrphanSweeper short-circuits on nil reaper, the
// individual pass also early-exits. Protects against future refactors
// that wire the pass without the outer guard.
func TestSweepStaleTokens_NilReaperEarlyExit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// No DB queries expected. If the early-return is removed, sqlmock
// fails on the unexpected SELECT.
sweepStaleTokensWithoutContainer(context.Background(), nil)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSweepOnce_StaleTokenSkippedWhenDockerListFails — if the third
// pass can't enumerate containers (Docker hiccup), it must skip the
// query entirely. Otherwise it would query with empty likes and
// revoke every stale-token workspace based on stale information.
func TestSweepOnce_StaleTokenSkippedWhenDockerListFails(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
reaper := &fakeReaper{listErr: errors.New("daemon unreachable")}
// No DB queries expected: first pass bails on listErr, second
// pass uses managedList (also fails because we never set it),
// third pass also bails on listErr. Verify by NOT registering
// ExpectStaleTokenSweepNoOp — sqlmock fails on any unexpected
// query.
sweepOnce(context.Background(), reaper)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}

View File

@ -437,13 +437,23 @@ async def main(): # pragma: no cover
)
async def _send_initial_prompt():
"""Wait for server to be ready, then send initial_prompt as self-message."""
# Wait for the A2A server to accept connections
# Wait for the A2A server to accept connections.
# Use the SDK's own constant for the well-known path so this
# probe and the route mounted by create_agent_card_routes()
# never drift apart. Pre-fix this hardcoded the pre-1.x
# well-known path string; a2a-sdk 1.x renamed it (the
# canonical value lives in a2a.utils.constants now), so
# the probe got 404 every attempt and fell through to
# "server not ready after 30s, skipping" even though the
# server was actually serving fine. Net effect: every
# workspace silently dropped its `initial_prompt`.
from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
ready = False
for attempt in range(30):
await asyncio.sleep(1)
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"http://127.0.0.1:{port}/.well-known/agent.json")
resp = await client.get(f"http://127.0.0.1:{port}{AGENT_CARD_WELL_KNOWN_PATH}")
if resp.status_code == 200:
ready = True
break

View File

@ -0,0 +1,84 @@
"""Pin the agent-card readiness probe to the SDK's canonical path.
main.py's _send_initial_prompt() polls the local A2A server's
well-known agent-card URL to know when it's safe to send the initial
prompt as a self-message. Pre-fix the URL was hardcoded to the pre-1.x
literal; a2a-sdk 1.x renamed the well-known path (the canonical value
lives in `a2a.utils.constants.AGENT_CARD_WELL_KNOWN_PATH`), so the
probe got 404 every attempt and silently fell through to "server not
ready after 30s, skipping" — dropping every workspace's
`initial_prompt` from config.yaml.
The fix is to import the SDK's `AGENT_CARD_WELL_KNOWN_PATH` constant
and use it directly in the probe URL. These tests pin the static
invariants of that fix:
1. No hardcoded `/.well-known/agent.json` literal anywhere in
main.py (catches a future contributor reverting to a literal).
2. The probe URL fstring interpolates `AGENT_CARD_WELL_KNOWN_PATH`
(catches a "fix" that imports the constant for show but still
uses a literal in the actual GET).
Note: we deliberately do not assert the constant's value or compare
it against `create_agent_card_routes()` here. The runtime SDK is
mocked in this directory's conftest for the executor-test path, so
any test that imports the real `a2a.utils.constants` would either
collide with the mock or require running in a separate pytest session.
The two static invariants are sufficient: by always following whatever
the SDK constant says, we travel through any rename automatically. The
SDK's own contract that `create_agent_card_routes` mounts at the
constant's value is the SDK's responsibility, not ours.
"""
from __future__ import annotations
import re
from pathlib import Path
WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
def test_main_uses_sdk_constant_for_agent_card_probe():
"""No hardcoded `/.well-known/agent.json` literal anywhere in main.py.
The SDK constant (AGENT_CARD_WELL_KNOWN_PATH) is the single source
of truth string-literal probes drift the moment the SDK renames.
"""
main = (WORKSPACE_ROOT / "main.py").read_text()
bad_literal = "/.well-known/agent.json"
offenders = [
(lineno, line)
for lineno, line in enumerate(main.splitlines(), 1)
if bad_literal in line
]
assert not offenders, (
f"Found pre-1.x literal {bad_literal!r} in main.py — must use "
f"the SDK's AGENT_CARD_WELL_KNOWN_PATH constant instead. "
f"Offending lines: {offenders}"
)
assert (
"AGENT_CARD_WELL_KNOWN_PATH" in main
), "main.py must import a2a.utils.constants.AGENT_CARD_WELL_KNOWN_PATH"
def test_probe_loop_uses_constant_in_url_format():
"""Spot-check that the URL fstring in main.py interpolates the
constant, not a literal. Catches a future "fix" that imports the
constant for show but still uses a literal in the actual GET."""
main = (WORKSPACE_ROOT / "main.py").read_text()
# The probe pattern: `client.get(f"http://127.0.0.1:{port}{...}")`
# where `{...}` must be `{AGENT_CARD_WELL_KNOWN_PATH}`, not a
# hardcoded path.
pattern = re.compile(
r'client\.get\(f"http://127\.0\.0\.1:\{port\}\{(?P<expr>[^}]+)\}"\)'
)
matches = pattern.findall(main)
assert matches, "no readiness probe pattern found in main.py"
for expr in matches:
assert "AGENT_CARD_WELL_KNOWN_PATH" in expr, (
f"readiness probe URL uses {expr!r} instead of "
f"AGENT_CARD_WELL_KNOWN_PATH"
)