diff --git a/.github/workflows/publish-runtime.yml b/.github/workflows/publish-runtime.yml index e1fd659d..516f8f98 100644 --- a/.github/workflows/publish-runtime.yml +++ b/.github/workflows/publish-runtime.yml @@ -79,6 +79,7 @@ jobs: id-token: write # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed outputs: version: ${{ steps.version.outputs.version }} + wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }} steps: - uses: actions/checkout@v4 @@ -129,6 +130,28 @@ jobs: working-directory: ${{ runner.temp }}/runtime-build run: python -m build + - name: Capture wheel SHA256 for cascade content-verification + # Recorded BEFORE upload so the cascade probe can verify the + # bytes Fastly serves under the new version's URL match what + # we built. Closes a hole left by #2197: that probe verified + # pip can resolve the version (catches propagation lag) but + # not that the wheel content matches (would silently pass a + # Fastly stale-content scenario where the new version's URL + # serves an old wheel binary). + id: wheel_hash + working-directory: ${{ runner.temp }}/runtime-build + run: | + set -eu + WHEEL=$(ls dist/*.whl 2>/dev/null | head -1) + if [ -z "$WHEEL" ]; then + echo "::error::No .whl in dist/ — `python -m build` must have failed silently" + exit 1 + fi + HASH=$(sha256sum "$WHEEL" | awk '{print $1}') + echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT" + echo "Local wheel SHA256 (pre-upload): ${HASH}" + echo "Wheel filename: $(basename "$WHEEL")" + - name: Verify package contents (sanity) working-directory: ${{ runner.temp }}/runtime-build run: | @@ -212,6 +235,56 @@ jobs: default_output_modes=['text/plain', 'application/json'], ) print('✓ AgentCard call-shape smoke passed') + + # Well-known agent-card path probe alignment. main.py's + # _send_initial_prompt() polls AGENT_CARD_WELL_KNOWN_PATH + # to know when the local A2A server is ready. If the SDK + # ever splits the constant value from the path that + # create_agent_card_routes() actually mounts at, every + # workspace silently drops its initial_prompt: + # - Probe gets 404 every attempt. + # - Falls through to 'server not ready after 30s, + # skipping' even though the server is fine. + # - The user hits a fresh chat with no kickoff context. + # This was the #2193 incident class — the v0.x → v1.x + # rename of /.well-known/agent.json → /.well-known/agent-card.json + # plus the constant itself moving to a2a.utils.constants. + # source-tree pytest (test_agent_card_well_known_path.py) + # catches main.py-side regressions; this catches the + # SDK-side ones BEFORE PyPI upload. + from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH + from a2a.server.routes import create_agent_card_routes + mounted_paths = [ + getattr(r, 'path', None) + for r in create_agent_card_routes( + AgentCard( + name='wk-smoke', + description='well-known mount alignment', + version='0.0.0-smoke', + ) + ) + ] + assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, ( + f'AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) ' + f'is NOT among paths mounted by create_agent_card_routes ' + f'({mounted_paths!r}). The SDK constant and its own route ' + f'factory have drifted — workspace probes will 404 forever, ' + f'silently dropping every workspace initial_prompt.' + ) + print(f'✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})') + + # Message helper smoke. a2a-sdk renamed + # new_agent_text_message → new_text_message in the v1.x + # protobuf-flat migration (per the v0→v1 cheat sheet). main.py + # and a2a_executor.py call new_text_message in hot paths; if + # the import breaks, every reply errors with ImportError before + # the message even leaves the workspace. Importing here + # catches a future v2.x rename at publish time. + from a2a.helpers import new_text_message + msg = new_text_message('smoke') + assert msg is not None, 'new_text_message returned None' + print('✓ message helper import + call OK') + print('✓ smoke import passed') " @@ -239,28 +312,98 @@ jobs: runs-on: ubuntu-latest steps: - name: Wait for PyPI to propagate the new version - # PyPI accepts the upload, then takes a few seconds to make it - # available via the package index. If the cascade fires too - # fast, downstream template builds run `pip install` against - # an index that hasn't seen the new version yet — they resolve - # to the previous one, and docker layer cache then locks that - # in for subsequent rebuilds (the cache trap that bit us five - # times tonight). + # PyPI accepts the upload, then takes a few seconds to make the + # new version visible across all THREE surfaces pip touches: + # 1. /pypi///json — metadata endpoint + # 2. /simple// — pip's primary download index + # 3. files.pythonhosted.org — CDN-fronted wheel binary + # Each has its own cache. The previous check polled only (1) + # and would let the cascade fire while (2) or (3) still served + # the previous version, so downstream `pip install` resolved + # to the old wheel. Docker layer cache then locked that stale + # resolution in for subsequent rebuilds (the cache trap that + # bit us five times in one night). # - # Poll PyPI's JSON API for up to 60s. Cheap (~50ms per poll), - # avoids over-trusting "publish job said success." + # Two-stage probe per poll: + # (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds + # only when the version is resolvable. Catches surface (1) + # and (2) propagation lag. + # (b) `pip download` of the same wheel + SHA256 compare against + # the just-built dist's hash. Catches surface (3) lag AND + # Fastly serving stale content under the new version's URL + # (a separate Fastly-corruption mode that pip-install alone + # can't see, since pip install resolves+unpacks against + # whatever bytes Fastly returns and never inspects them). + # Both must pass before the cascade fans out. + # + # The venv is reused across polls; only `pip install`/`pip + # download` run in the loop, with --force-reinstall + + # --no-cache-dir so the previous poll's cached state doesn't + # mask propagation lag. env: RUNTIME_VERSION: ${{ needs.publish.outputs.version }} + EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }} run: | set -eu + if [ -z "$EXPECTED_SHA256" ]; then + echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade." + exit 1 + fi + python -m venv /tmp/propagation-probe + PROBE=/tmp/propagation-probe/bin + $PROBE/pip install --upgrade --quiet pip + # Poll budget: 30 attempts × (~3-5s pip install + ~3s pip + # download + 4s sleep) ≈ 5-6 min wall on a slow GH runner. + # Generous vs PyPI's typical few-seconds propagation; + # failures past this are signal of a real PyPI / Fastly + # issue, not just lag. for i in $(seq 1 30); do - if curl -fsS "https://pypi.org/pypi/molecule-ai-workspace-runtime/${RUNTIME_VERSION}/json" >/dev/null 2>&1; then - echo "::notice::✓ PyPI serving ${RUNTIME_VERSION} after ${i} polls" - exit 0 + # Stage (a): can pip resolve and install the version? + if $PROBE/pip install \ + --quiet \ + --no-cache-dir \ + --force-reinstall \ + --no-deps \ + "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ + >/dev/null 2>&1; then + INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \ + | awk -F': ' '/^Version:/{print $2}') + if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then + # Stage (b): does Fastly serve the bytes we uploaded? + # `pip download` writes the actual .whl file to disk so + # we can sha256sum it (vs `pip install` which unpacks + # and discards). + rm -rf /tmp/probe-dl + mkdir -p /tmp/probe-dl + if $PROBE/pip download \ + --quiet \ + --no-cache-dir \ + --no-deps \ + --dest /tmp/probe-dl \ + "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ + >/dev/null 2>&1; then + WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1) + if [ -n "$WHEEL" ]; then + ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}') + if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then + echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})" + exit 0 + fi + # Hash mismatch: PyPI accepted our upload but Fastly + # is serving different bytes under the version's URL. + # Most often this is propagation lag of the BINARY + # surface — the version is resolvable but the wheel + # cache hasn't caught up. Retry. + echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying" + fi + fi + fi fi - sleep 2 + sleep 4 done - echo "::error::PyPI never propagated ${RUNTIME_VERSION} within 60s — refusing to fan out cascade against stale index" + echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min." + echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}" + echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces." exit 1 - name: Fan out repository_dispatch diff --git a/.github/workflows/runtime-pin-compat.yml b/.github/workflows/runtime-pin-compat.yml index 976c3194..2672f355 100644 --- a/.github/workflows/runtime-pin-compat.yml +++ b/.github/workflows/runtime-pin-compat.yml @@ -10,15 +10,29 @@ name: Runtime Pin Compatibility # 4. Every tenant workspace crashed; the canary tenant caught it but # only after 5 hours of degraded staging # -# This workflow installs the runtime in a fresh Python venv from PyPI -# and tries the same import the EC2 user-data does. If pip resolution -# silently produces a broken combo, this gate fails before the tenant -# image gets published. +# This workflow installs the CURRENTLY PUBLISHED runtime from PyPI on +# top of `workspace/requirements.txt` and smoke-imports. Catches: +# - Upstream PyPI yanks +# - Bad re-releases of molecule-ai-workspace-runtime +# - Already-shipped wheels that stop importing because a transitive +# dep moved underneath +# +# This is the "PyPI artifact health" half of pin compatibility. The +# companion workflow `runtime-prbuild-compat.yml` covers the +# "PR-introduced breakage" half by building the wheel from THIS PR's +# workspace/ source. Splitting the two means each gets a narrow +# `paths:` filter — the pypi-latest job no longer fires on doc-only +# workspace/ edits whose content can't change what's currently on PyPI. on: push: branches: [main, staging] paths: + # Narrow filter: pypi-latest is sensitive only to changes that + # affect what we're INSTALLING (requirements.txt) or WHAT THE + # CHECK ITSELF DOES (this workflow file). Edits to workspace/ + # source code don't change what's on PyPI right now, so they + # don't change this gate's verdict. - 'workspace/requirements.txt' - '.github/workflows/runtime-pin-compat.yml' pull_request: @@ -42,8 +56,8 @@ concurrency: cancel-in-progress: true jobs: - default-install: - name: Default install + import smoke + pypi-latest-install: + name: PyPI-latest install + import smoke runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/runtime-prbuild-compat.yml b/.github/workflows/runtime-prbuild-compat.yml new file mode 100644 index 00000000..41f8332a --- /dev/null +++ b/.github/workflows/runtime-prbuild-compat.yml @@ -0,0 +1,100 @@ +name: Runtime PR-Built Compatibility + +# Companion to `runtime-pin-compat.yml`. That workflow tests what's +# CURRENTLY PUBLISHED on PyPI; this workflow tests what WOULD BE +# PUBLISHED if THIS PR merges. +# +# Why two workflows: the chicken-and-egg #128 fix added a "PR-built +# wheel" job to the original runtime-pin-compat.yml, but both jobs +# shared a `paths:` filter that was the union of their needs +# (`workspace/**`). That meant the PyPI-latest job ran on every doc +# edit even though the upstream PyPI artifact can't change with our +# workspace/ source. Splitting the two means each gets a narrow +# `paths:` filter that matches the inputs it actually depends on. +# +# Catches the failure mode where a PR adds an import requiring a newer +# SDK than `workspace/requirements.txt` pins: +# 1. Pip resolves the existing PyPI wheel + the old SDK pin → smoke +# passes (it imports the OLD main.py from the wheel, not the PR's +# new main.py). +# 2. Merge → publish-runtime.yml ships a wheel WITH the new import. +# 3. Tenant images redeploy → all crash on first boot with +# ImportError. +# +# By building from the PR's source and smoke-importing THAT wheel, we +# fail at PR-time instead of after publish. + +on: + push: + branches: [main, staging] + paths: + # Broad filter: this workflow's verdict can change whenever any + # workspace/ source file changes (because the wheel we build is + # produced from those files), or when the build script itself + # changes (it controls the wheel layout). + - 'workspace/**' + - 'scripts/build_runtime_package.py' + - '.github/workflows/runtime-prbuild-compat.yml' + pull_request: + branches: [main, staging] + paths: + - 'workspace/**' + - 'scripts/build_runtime_package.py' + - '.github/workflows/runtime-prbuild-compat.yml' + workflow_dispatch: + # Required-check support: when this becomes a branch-protection gate, + # merge_group runs let the queue green-check this in addition to PRs. + merge_group: + types: [checks_requested] + # No cron: the same pre-merge run already covered the commit, and + # re-running daily wouldn't surface anything new (workspace/ doesn't + # change between cron firings unless a PR already passed this gate). + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + local-build-install: + # Builds the wheel from THIS PR's workspace/ + scripts/ and tests + # IT — the artifact that WOULD be published if this PR merges. + name: PR-built wheel + import smoke + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: pip + cache-dependency-path: workspace/requirements.txt + - name: Install build tooling + run: pip install build + - name: Build wheel from PR source (mirrors publish-runtime.yml) + # Use a fixed test version so the wheel filename is predictable. + # Doesn't reach PyPI — this build is local-only for the smoke. + # Use the SAME build script with the SAME args as + # publish-runtime.yml's build step. The temp dir path differs + # (`/tmp/runtime-build` here vs `${{ runner.temp }}/runtime-build` + # in publish-runtime.yml — they coincide on ubuntu-latest but + # the call sites are not byte-identical). The smoke import is + # also intentionally narrower than publish's: this gate exists + # to catch SDK-version-import drift specifically; full invariant + # coverage lives in publish-runtime.yml's own pre-PyPI smoke. + run: | + python scripts/build_runtime_package.py \ + --version "0.0.0.dev0+pin-compat" \ + --out /tmp/runtime-build + cd /tmp/runtime-build && python -m build + - name: Install built wheel + workspace requirements + run: | + python -m venv /tmp/venv-built + /tmp/venv-built/bin/pip install --upgrade pip + /tmp/venv-built/bin/pip install /tmp/runtime-build/dist/*.whl + /tmp/venv-built/bin/pip install -r workspace/requirements.txt + /tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \ + | grep -E '^(Name|Version):' + - name: Smoke import the PR-built wheel + env: + WORKSPACE_ID: 00000000-0000-0000-0000-000000000001 + run: | + /tmp/venv-built/bin/python -c "from molecule_runtime.main import main_sync; print('PR-built runtime imports OK')" diff --git a/docs/ecosystem-watch.md b/docs/ecosystem-watch.md index 8f6894df..674c5d46 100644 --- a/docs/ecosystem-watch.md +++ b/docs/ecosystem-watch.md @@ -1,6 +1,6 @@ # Ecosystem Watch — Phase 30 Competitive Tracking **Created by:** PMM -**Date:** 2026-04-21 +**Date:** 2026-04-27 **Status:** ACTIVE — competitor monitoring in progress **Phase:** 30 — Remote Workspaces + Cross-Network Federation @@ -118,7 +118,7 @@ Track competitor releases and market events that affect Phase 30 positioning. En - **Check frequency:** Every marketing cycle - **Trigger:** Any competitor shipping something that invalidates a Phase 30 positioning claim - **File location:** `docs/ecosystem-watch.md` (origin/main) -- **Last updated by:** PMM | 2026-04-23 (LangGraph PRs verified OPEN; new feat PRs #1730/#1702/#1731 logged; release note written) +- **Last updated by:** PMM | 2026-04-27 (weekly refresh — all competitor versions, stars, and PRs verified current; Clawith v1.9.0 added) --- diff --git a/docs/quickstart.md b/docs/quickstart.md index a0483d74..4f0f2ff7 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -7,20 +7,46 @@ This path is aligned to the current repository and current UI. It gets you from - Docker + Docker Compose v2 - Node.js 20+ - Go 1.25+ +- `jq` (for the template-registry clone in `setup.sh`) - One model/API key for the runtime you want to use - `ANTHROPIC_API_KEY` - `OPENAI_API_KEY` - `GOOGLE_API_KEY` - or another provider routed through LiteLLM -## Step 1: Clone the repository +## The one-command path + +```bash +git clone https://github.com/Molecule-AI/molecule-monorepo.git +cd molecule-monorepo +./scripts/dev-start.sh +``` + +That single script: + +1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs) +2. Brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal via `infra/scripts/setup.sh` +3. Populates the workspace template + plugin registry from `manifest.json` +4. Builds and starts the platform on `http://localhost:8080` +5. Installs canvas deps (first run) and starts the canvas on `http://localhost:3000` +6. Prints next-step instructions and tails both processes — `Ctrl-C` tears everything down + +Total wall-clock: ~30 seconds for a re-run, ~2 minutes for a first run (npm install + docker pulls). + +Once the canvas is up: open it, add your model API key in **Config → Secrets & API Keys → Global**, then click a template card or **+ Create blank workspace**. + +## Manual setup (advanced) + +If you'd rather run each component yourself — useful when you're iterating on the platform binary or the canvas in isolation — follow the steps below. Each section is what `dev-start.sh` does internally; running them by hand gives you per-component logs and lets you keep one piece running while you restart another. + +### Step 1: Clone the repository ```bash git clone https://github.com/Molecule-AI/molecule-monorepo.git cd molecule-monorepo ``` -## Step 2: Start the shared infrastructure +### Step 2: Start the shared infrastructure Recommended: @@ -28,7 +54,7 @@ Recommended: ./infra/scripts/setup.sh ``` -That brings up Postgres, Redis, and Langfuse. +That brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal. If you only want the raw compose flow: @@ -36,7 +62,7 @@ If you only want the raw compose flow: docker compose -f docker-compose.infra.yml up -d ``` -## Step 3: Start the platform +### Step 3: Start the platform ```bash cd workspace-server @@ -45,7 +71,7 @@ go run ./cmd/server The control plane listens on `http://localhost:8080`. -## Step 4: Start the canvas +### Step 4: Start the canvas In a new terminal: diff --git a/scripts/dev-start.sh b/scripts/dev-start.sh index 8eda6dd4..b99d1b4f 100755 --- a/scripts/dev-start.sh +++ b/scripts/dev-start.sh @@ -1,69 +1,182 @@ #!/bin/sh # dev-start.sh — one-command local development environment. # -# Starts: Postgres, Redis, Platform (Go :8080), Canvas (Next.js :3000) -# Stops all on Ctrl-C. +# What it does (in order): +# 1. Generates ADMIN_TOKEN into .env if missing (closes #684 fail-open) +# 2. Runs infra/scripts/setup.sh (postgres + redis + langfuse + clickhouse +# + temporal + populates template/plugin registry from manifest.json) +# 3. Starts the platform (Go :8080), waits for /health +# 4. Starts the canvas (Next.js :3000), waits for HTTP 200 +# 5. Prints a readiness banner with API-key add instructions +# 6. On Ctrl-C, kills both background processes and tears down infra # # Prerequisites: -# - Docker (for Postgres + Redis) -# - Go 1.25+ (for platform) -# - Node.js 20+ (for canvas) +# - Docker + Docker Compose v2 (for postgres/redis/langfuse/etc) +# - Go 1.25+ (for the platform binary) +# - Node.js 20+ (for the canvas) +# - jq (for setup.sh's manifest clone — optional; +# without it, template palette will be +# empty until you run clone-manifest.sh +# manually) # # Usage: # ./scripts/dev-start.sh -# # Open http://localhost:3000 +# # Open http://localhost:3000, add your model API key in +# # Config → Secrets & API Keys, then create your first workspace. +# +# Idempotent: re-running picks up where the last run left off (existing +# .env is preserved, npm install skipped if node_modules present, etc). set -e ROOT="$(cd "$(dirname "$0")/.." && pwd)" +ENV_FILE="$ROOT/.env" cleanup() { echo "" - echo "Shutting down..." + echo "==> Shutting down..." kill $PLATFORM_PID $CANVAS_PID 2>/dev/null || true + # Use setup.sh's compose file (full infra) since that's what we + # brought up. `down` keeps named volumes by default — call with + # --volumes here only if you want a clean slate (we don't, since + # idempotent re-runs are the usual case). docker compose -f "$ROOT/docker-compose.infra.yml" down 2>/dev/null || true - echo "Done." + echo " Done." } trap cleanup EXIT INT TERM -echo "==> Starting infrastructure (Postgres, Redis)..." -docker compose -f "$ROOT/docker-compose.infra.yml" up -d +# ─────────────────────────────────────────────── 1. dev-mode auth posture -echo "==> Waiting for Postgres..." -until docker compose -f "$ROOT/docker-compose.infra.yml" exec -T postgres pg_isready -q 2>/dev/null; do - sleep 1 -done -echo " Postgres ready." +# The AdminAuth middleware closes its fail-open the moment the first +# workspace token lands in the DB — at which point /workspaces and +# other admin routes 401 unless the caller has either ADMIN_TOKEN or +# the dev-mode escape hatch. The canvas at localhost:3000 has no +# bearer token to send, so without one of those two paths it can't +# call admin endpoints after a workspace exists. +# +# For local dev the right posture is the dev-mode escape hatch: +# +# MOLECULE_ENV=development AND ADMIN_TOKEN unset +# +# That makes middleware.isDevModeFailOpen() return true and lets the +# canvas keep working without a bearer. Setting ADMIN_TOKEN here +# would BREAK the canvas (it has no way to read that token in dev). +# +# For SaaS the platform is provisioned with ADMIN_TOKEN set AND +# MOLECULE_ENV=production — either one closes the hatch. So the dev +# mode signal here is safe (it's only active when both other knobs +# are absent). +if [ -f "$ENV_FILE" ] && grep -q '^MOLECULE_ENV=' "$ENV_FILE"; then + echo "==> Reusing MOLECULE_ENV from existing .env" +else + echo "==> Setting MOLECULE_ENV=development in .env (dev-mode auth hatch)" + { + if [ -f "$ENV_FILE" ]; then + cat "$ENV_FILE" + echo "" + fi + echo "# Generated by scripts/dev-start.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "# Local-dev auth posture: dev-mode fail-open lets the canvas at" + echo "# localhost:3000 call admin endpoints without a bearer token." + echo "# DO NOT set ADMIN_TOKEN here in dev — it would close the hatch" + echo "# and the canvas would 401 on every admin call." + echo "MOLECULE_ENV=development" + } > "$ENV_FILE.tmp" + mv "$ENV_FILE.tmp" "$ENV_FILE" + echo " Saved to $ENV_FILE" +fi -echo "==> Starting Platform (Go :8080)..." +# Source .env so the platform inherits ADMIN_TOKEN (and anything else +# the user has added — e.g. ANTHROPIC_API_KEY for skipping the canvas +# Secrets UI). `set -a` exports every assignment in the sourced file +# without us having to know the var names. +set -a +# shellcheck disable=SC1090 +. "$ENV_FILE" +set +a + +# ─────────────────────────────────────────────── 2. infra + templates + +# Use setup.sh (not raw docker-compose) so the template registry gets +# populated from manifest.json. Without that, the canvas template +# palette is empty and the user has to manually clone repos — exactly +# the friction this script exists to eliminate. +echo "==> Running infra/scripts/setup.sh (infra + template registry)" +"$ROOT/infra/scripts/setup.sh" + +# ─────────────────────────────────────────────── 3. platform + +echo "==> Starting Platform (Go :8080)" cd "$ROOT/workspace-server" -go run ./cmd/server & +go run ./cmd/server > /tmp/molecule-platform.log 2>&1 & PLATFORM_PID=$! -echo "==> Waiting for Platform health..." -until curl -sf http://localhost:8080/health >/dev/null 2>&1; do +echo " Waiting for Platform /health..." +PLATFORM_READY=0 +for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ + 21 22 23 24 25 26 27 28 29 30; do + if curl -sf http://localhost:8080/health >/dev/null 2>&1; then + echo " Platform ready (t+${i}s)" + PLATFORM_READY=1 + break + fi sleep 1 done -echo " Platform ready." +if [ "$PLATFORM_READY" -ne 1 ]; then + echo " ✗ Platform did not respond in 30s — check /tmp/molecule-platform.log" + exit 1 +fi -echo "==> Starting Canvas (Next.js :3000)..." +# ─────────────────────────────────────────────── 4. canvas + +echo "==> Starting Canvas (Next.js :3000)" cd "$ROOT/canvas" if [ ! -d node_modules ]; then + echo " First-run: npm install (~30-60s)" npm install fi -npm run dev & +npm run dev > /tmp/molecule-canvas.log 2>&1 & CANVAS_PID=$! -echo "" -echo "============================================" -echo " Molecule AI dev environment running" -echo "" -echo " Canvas: http://localhost:3000" -echo " Platform: http://localhost:8080" -echo " Postgres: localhost:5432" -echo " Redis: localhost:6379" -echo "" -echo " Press Ctrl-C to stop all services" -echo "============================================" +echo " Waiting for Canvas HTTP 200..." +CANVAS_READY=0 +for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ + 21 22 23 24 25 26 27 28 29 30; do + code=$(curl -sf -o /dev/null -w "%{http_code}" http://localhost:3000/ 2>/dev/null || echo "0") + if [ "$code" = "200" ]; then + echo " Canvas ready (t+${i}s)" + CANVAS_READY=1 + break + fi + sleep 1 +done +if [ "$CANVAS_READY" -ne 1 ]; then + echo " ✗ Canvas did not respond in 30s — check /tmp/molecule-canvas.log" + exit 1 +fi + +# ─────────────────────────────────────────────── 5. readiness banner + +cat </dev/null 2>&1 \ && ok "Teardown request accepted" \ || log "Teardown returned non-2xx (may already be gone)" - sleep 10 - local leak_count - leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \ - -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \ - | python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \ - 2>/dev/null || echo 0) + local leak_count=1 + local elapsed=0 + while [ "$elapsed" -lt 60 ]; do + leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \ + -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \ + | python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \ + 2>/dev/null || echo 1) + if [ "$leak_count" = "0" ]; then + break + fi + sleep 5 + elapsed=$((elapsed + 5)) + done + if [ "$leak_count" != "0" ]; then - echo "⚠️ LEAK: org $SLUG still present post-teardown (count=$leak_count)" >&2 + echo "⚠️ LEAK: org $SLUG still present post-teardown after ${elapsed}s (count=$leak_count)" >&2 exit 4 fi - ok "Teardown clean — no orphan resources for $SLUG" + ok "Teardown clean — no orphan resources for $SLUG (${elapsed}s)" # Normalize unexpected upstream exit codes to 1 (generic failure). The # script's documented contract (header "Exit codes" section) only emits diff --git a/workspace-server/internal/registry/orphan_sweeper.go b/workspace-server/internal/registry/orphan_sweeper.go index 03c14722..85c07d96 100644 --- a/workspace-server/internal/registry/orphan_sweeper.go +++ b/workspace-server/internal/registry/orphan_sweeper.go @@ -112,12 +112,15 @@ func sweepOnce(parent context.Context, reaper OrphanReaper) { ctx, cancel := context.WithTimeout(parent, orphanSweepDeadline) defer cancel() - // Two independent passes. Each handles its own short-circuit; an - // empty result or transient error in one must NOT stop the other, + // Three independent passes. Each handles its own short-circuit; an + // empty result or transient error in one must NOT stop the others, // since the wiped-DB pass exists precisely for cases where the - // removed-row pass finds zero candidates (DB has been dropped). + // removed-row pass finds zero candidates (DB has been dropped) and + // the stale-token pass exists for the mirror case (DB persists but + // /configs volume has been wiped). sweepRemovedRows(ctx, reaper) sweepLabeledOrphansWithoutRows(ctx, reaper) + sweepStaleTokensWithoutContainer(ctx, reaper) } // sweepRemovedRows is the original sweep: ws-* containers (by name @@ -290,3 +293,181 @@ func sweepLabeledOrphansWithoutRows(ctx context.Context, reaper OrphanReaper) { } } } + +// staleTokenGrace bounds how recently a token must have been used (or +// issued, if never used) for it to be considered "potentially live". +// Anything quieter than this is fair game for the stale-token revoke +// pass when there's no matching container. +// +// Sized vs the heartbeat cadence (30s) and provisioning latency: a +// healthy workspace touches `last_used_at` every heartbeat, so 5min is +// 10× the heartbeat interval — enough headroom that brief container +// restarts (Stop → Start) don't trip the pass. A workspace that's been +// silent past this window AND has no container is either a wiped-volume +// orphan or a workspace nobody is using; either way, revoking is safe +// because the next /registry/register mints a fresh token via the +// no-live-tokens bootstrap branch in registry.go. +const staleTokenGrace = 5 * time.Minute + +// sweepStaleTokensWithoutContainer revokes workspace_auth_tokens rows +// for workspaces whose /configs volume must have been wiped — detected +// as "live token in DB whose owning workspace has no live Docker +// container". This heals the user-reported failure mode where +// `docker compose down -v` (or any out-of-band volume removal) leaves +// stale tokens in the DB while the recreated container has an empty +// `/configs/.auth_token`. Without this pass, /registry/register on the +// fresh container 401s forever (requireWorkspaceToken sees live tokens, +// container can't present one), and the workspace is permanently +// wedged until an operator manually revokes via SQL. +// +// The platform's restart endpoint already handles this case correctly +// via wsauth.RevokeAllForWorkspace inside issueAndInjectToken — this +// pass is the safety net for the equivalent action taken outside the +// API (operator did `docker compose down -v`, host crashed mid-restart, +// disk pressure evicted a volume, etc). +// +// Safety filters that bound the revoke radius: +// +// 1. Only runs in single-tenant Docker mode. The orphan sweeper is +// wired only when prov != nil (see cmd/server/main.go) — in CP/SaaS +// mode there is no Docker daemon and the sweeper doesn't run, so an +// empty container list cannot be confused with "no Docker at all" +// here (which would otherwise revoke every workspace's tokens). +// The function also short-circuits on a nil reaper as a belt-and- +// braces guard against a future refactor wiring it incorrectly. +// +// 2. staleTokenGrace skips tokens that were issued or used in the +// last 5 minutes. Bounds the race with mid-provisioning (token +// issued moments before docker run completes) and brief restart +// windows. +// +// 3. CRITICAL: the staleness predicate is enforced AT THE UPDATE, +// not just at the SELECT. This closes a TOCTOU race against +// workspace_provision.go:issueAndInjectToken — the platform's +// restart endpoint Stops the container synchronously then dispatches +// re-provisioning to a goroutine, so a stale-on-SELECT workspace +// can have a fresh token inserted by issueAndInjectToken between +// our SELECT and our UPDATE. A predicate-only `WHERE workspace_id +// = $1 AND revoked_at IS NULL` UPDATE would catch that fresh token +// too. Carrying COALESCE(last_used_at, created_at) < now() - grace +// in the UPDATE makes the operation idempotent against fresh +// inserts: a token created within the grace window cannot match. +// +// 4. The DB query joins on workspaces.status NOT IN ('removed', +// 'provisioning') so deleted and mid-restart workspaces are not +// revoked here — those are handled at delete time and by +// issueAndInjectToken respectively. (`status = 'provisioning'` is +// set synchronously in workspace_restart.go before the async +// re-provision begins, so it's a reliable in-flight signal.) +// +// 5. Each revocation is logged with the workspace ID so operators can +// correlate "workspace just lost auth" with this sweeper, not blame +// a network blip. +// +// Failure mode: revoke fails for some reason (transient DB error). The +// next sweep cycle (60s out) retries. Worst case: a workspace stays +// 401-blocked an extra minute. +func sweepStaleTokensWithoutContainer(ctx context.Context, reaper OrphanReaper) { + // Defence-in-depth (F2): a future refactor that wires the sweeper + // in CP/SaaS mode without checking prov would otherwise hit this + // pass with a nil reaper. The StartOrphanSweeper entry point + // already short-circuits on nil, but we don't want to depend on + // every future caller doing the same. + if reaper == nil { + return + } + + prefixes, err := reaper.ListWorkspaceContainerIDPrefixes(ctx) + if err != nil { + log.Printf("Orphan sweeper: ListWorkspaceContainerIDPrefixes failed: %v — skipping stale-token pass", err) + return + } + + // Same hex-and-dash filter as the other passes — anything that + // can't be a workspace UUID prefix doesn't belong in a SQL LIKE + // pattern. + // + // NOTE: an empty `likes` array is intentionally NOT a short-circuit. + // "No workspace containers" is the load-bearing case for this pass + // (operator nuked everything). The `cardinality($1) = 0` clause in + // the SELECT below treats empty likes as "no LIKE filter" → every + // stale-token workspace becomes a candidate. The first two passes' + // early-return-on-empty-prefixes pattern would defeat this entire + // pass's purpose. + likes := make([]string, 0, len(prefixes)) + for _, p := range prefixes { + if !isLikelyWorkspaceID(p) { + continue + } + likes = append(likes, p+"%") + } + + // Find workspaces with live tokens whose most-recent activity is + // past the grace window AND whose ID does NOT match any live + // container prefix. When `likes` is empty (no workspace containers + // running at all), every stale-activity workspace is a candidate — + // expressed via the `cardinality($1) = 0` short-circuit so the + // query has a single shape regardless of container count. + // + // make_interval(secs => $2) avoids the time.Duration.String() → + // `"5m0s"` mismatch with Postgres interval grammar; passing seconds + // as an int keeps the binding portable. + graceSeconds := int(staleTokenGrace.Seconds()) + rows, qErr := db.DB.QueryContext(ctx, ` + SELECT DISTINCT t.workspace_id::text + FROM workspace_auth_tokens t + JOIN workspaces w ON w.id = t.workspace_id + WHERE t.revoked_at IS NULL + AND w.status NOT IN ('removed', 'provisioning') + AND COALESCE(t.last_used_at, t.created_at) < now() - make_interval(secs => $2) + AND ( + cardinality($1::text[]) = 0 + OR NOT (t.workspace_id::text LIKE ANY($1::text[])) + ) + `, pq.Array(likes), graceSeconds) + if qErr != nil { + log.Printf("Orphan sweeper: stale-token query failed: %v — skipping stale-token pass", qErr) + return + } + defer rows.Close() + + var staleWorkspaceIDs []string + for rows.Next() { + var id string + if scanErr := rows.Scan(&id); scanErr != nil { + log.Printf("Orphan sweeper: stale-token row scan failed: %v", scanErr) + continue + } + staleWorkspaceIDs = append(staleWorkspaceIDs, id) + } + if iterErr := rows.Err(); iterErr != nil { + log.Printf("Orphan sweeper: stale-token rows iteration failed: %v", iterErr) + return + } + + // Per-workspace UPDATE with the SAME staleness predicate as the + // SELECT, so any token inserted between SELECT and UPDATE (e.g. + // issueAndInjectToken racing during a user-triggered restart of a + // long-idle workspace) is automatically excluded — its created_at + // is fresh and won't satisfy `< now() - grace`. + // + // We deliberately bypass wsauth.RevokeAllForWorkspace here because + // that helper revokes EVERY live token for the workspace; we want + // "every STALE live token", which is a different (safer) operation. + for _, wsID := range staleWorkspaceIDs { + log.Printf("Orphan sweeper: revoking stale tokens for workspace %s (no live container; volume likely wiped)", wsID) + _, revokeErr := db.DB.ExecContext(ctx, ` + UPDATE workspace_auth_tokens + SET revoked_at = now() + WHERE workspace_id = $1 + AND revoked_at IS NULL + AND COALESCE(last_used_at, created_at) < now() - make_interval(secs => $2) + `, wsID, graceSeconds) + if revokeErr != nil { + // Non-fatal — next sweep retries. Bail on the loop so a + // systemic DB error doesn't spam the log on every iteration. + log.Printf("Orphan sweeper: stale-token revoke for %s failed: %v — will retry next cycle", wsID, revokeErr) + return + } + } +} diff --git a/workspace-server/internal/registry/orphan_sweeper_test.go b/workspace-server/internal/registry/orphan_sweeper_test.go index e28e51f6..9ce0c292 100644 --- a/workspace-server/internal/registry/orphan_sweeper_test.go +++ b/workspace-server/internal/registry/orphan_sweeper_test.go @@ -10,6 +10,24 @@ import ( "github.com/DATA-DOG/go-sqlmock" ) +// expectStaleTokenSweepNoOp registers the third-pass query +// (sweepStaleTokensWithoutContainer) returning zero rows. The third +// pass runs unconditionally on every sweepOnce, so every test that +// doesn't specifically exercise stale-token revocation must register +// this expectation or sqlmock will fail "unexpected query". +// +// Centralising the regex here keeps the existing test suite readable — +// individual tests don't have to spell out a query they're not actually +// asserting against. +// +// The regex is anchored at the start of the query AND requires the +// status-filter to keep us from accidentally matching a future query +// that opens with the same column name. R3 from the review. +func expectStaleTokenSweepNoOp(mock sqlmock.Sqlmock) { + mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`). + WillReturnRows(sqlmock.NewRows([]string{"workspace_id"})) +} + // fakeReaper is a hand-rolled OrphanReaper for the sweeper tests. // Records every Stop / RemoveVolume call so tests can assert which // workspace IDs got reconciled. @@ -73,6 +91,7 @@ func TestSweepOnce_ReconcilesRunningRemovedRows(t *testing.T) { mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`). WillReturnRows(sqlmock.NewRows([]string{"id"}). AddRow("abc123def456-0000-0000-0000-000000000000")) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -96,8 +115,11 @@ func TestSweepOnce_NoRunningContainers(t *testing.T) { reaper := &fakeReaper{listResponse: nil} - // No DB query expected — if sweepOnce makes one anyway the - // sqlmock will fail "unexpected query". + // First two passes short-circuit on empty container lists. The + // third pass (stale-token sweep) DOES query — that's its whole + // reason for existing in the no-containers case (operator nuked + // everything). Mock it returning no stale tokens. + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) if len(reaper.stopCalls) != 0 { @@ -145,6 +167,7 @@ func TestSweepOnce_StopFailureLeavesVolume(t *testing.T) { mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`). WillReturnRows(sqlmock.NewRows([]string{"id"}). AddRow("abc123def456-0000-0000-0000-000000000000")) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -173,6 +196,7 @@ func TestSweepOnce_VolumeRemoveErrorIsNonFatal(t *testing.T) { WillReturnRows(sqlmock.NewRows([]string{"id"}). AddRow("aaa111bbb222-0000-0000-0000-000000000000"). AddRow("ccc333ddd444-0000-0000-0000-000000000000")) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -205,9 +229,12 @@ func TestSweepOnce_FiltersNonWorkspacePrefixes(t *testing.T) { }, } - // No DB query expected — every prefix is rejected before the - // query builds, so we short-circuit. sqlmock fails on any - // unexpected query. + // First-pass query is skipped — every prefix is rejected before + // the query builds. Third-pass query still runs (filtered prefixes + // + non-empty input list still produces an empty likes array, + // which the third-pass treats the same as "no containers running" + // → stale-token candidates with no LIKE filter). Mock it empty. + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) if len(reaper.stopCalls) != 0 { @@ -289,6 +316,7 @@ func TestSweepOnce_WipedDBReapsLabeledOrphans(t *testing.T) { // returns no rows — both prefixes are unknown. mock.ExpectQuery(`SELECT lk\s+FROM unnest`). WillReturnRows(sqlmock.NewRows([]string{"lk"})) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -328,6 +356,7 @@ func TestSweepOnce_WipedDBSkipsLabeledContainersWithRows(t *testing.T) { WillReturnRows(sqlmock.NewRows([]string{"lk"}). AddRow("abc123def456%"). AddRow("ee0011223344%")) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -355,6 +384,7 @@ func TestSweepOnce_WipedDBReapsOnlyTheUnknownOnes(t *testing.T) { mock.ExpectQuery(`SELECT lk\s+FROM unnest`). WillReturnRows(sqlmock.NewRows([]string{"lk"}). AddRow(keep + "%")) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -380,7 +410,10 @@ func TestSweepOnce_WipedDBSkippedOnDockerError(t *testing.T) { } // No DB query expected for the second pass since we error out - // before reaching SQL. + // before reaching SQL. The third pass (stale-token sweep) uses + // ListWorkspaceContainerIDPrefixes (which succeeded with empty + // here, not the same call that errored), so it DOES query. + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) if len(reaper.stopCalls) != 0 { @@ -410,6 +443,7 @@ func TestSweepOnce_WipedDBSkipsNonUUIDPrefixes(t *testing.T) { // that should appear in the unnest array. mock.ExpectQuery(`SELECT lk\s+FROM unnest`). WillReturnRows(sqlmock.NewRows([]string{"lk"})) + expectStaleTokenSweepNoOp(mock) sweepOnce(context.Background(), reaper) @@ -420,3 +454,204 @@ func TestSweepOnce_WipedDBSkipsNonUUIDPrefixes(t *testing.T) { t.Errorf("unmet sqlmock expectations: %v", err) } } + +// ============================================================================= +// Third pass: sweepStaleTokensWithoutContainer +// +// Heals the user-reported "auth token conflict after volume wipe" failure mode. +// Scenario: operator runs `docker compose down -v` (or any out-of-band volume +// removal); DB still has tokens for workspaces whose recreated containers boot +// with empty /configs and 401 forever on /registry/register. +// ============================================================================= + +// TestSweepOnce_StaleTokenRevokeFiresWhenNoContainer — the headline +// case. A workspace has live tokens in the DB but no live container +// matches it (volume-wipe scenario). The third pass revokes. +func TestSweepOnce_StaleTokenRevokeFiresWhenNoContainer(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + // Two name-shaped containers running, both have status='removed' + // rows so first pass reaps them. Second pass finds nothing + // (managed list empty in this scenario). Third pass: even though + // the running containers cover those two prefixes, an unrelated + // workspace ID (no live container, no name prefix match) has + // stale tokens — revoke it. + const orphanedID = "deadbeef-0000-0000-0000-000000000000" + reaper := &fakeReaper{listResponse: []string{"abc123def456"}} + + mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`). + WillReturnRows(sqlmock.NewRows([]string{"id"}). + AddRow("abc123def456-0000-0000-0000-000000000000")) + + // Third-pass query returns the orphaned workspace. + // Tight regex pins the safety guards: status-filter excludes + // 'removed' and 'provisioning' (R2 + the C1 fix), and the + // staleness predicate appears in the SELECT. + mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\).*COALESCE\(t\.last_used_at, t\.created_at\) < now\(\) - make_interval`). + WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}). + AddRow(orphanedID)) + + // Revoke executes one UPDATE — and the UPDATE itself MUST also + // carry the staleness predicate (closes the C1 TOCTOU race + // against issueAndInjectToken inserting a fresh token between + // our SELECT and our UPDATE). + mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval`). + WithArgs(orphanedID, 300). + WillReturnResult(sqlmock.NewResult(0, 1)) + + sweepOnce(context.Background(), reaper) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestSweepOnce_StaleTokenSkippedWhenContainerExists — pin the safety +// guarantee: a workspace with both live tokens AND a live container +// must NOT be revoked. The query's NOT LIKE clause is the gate; this +// test exercises that gate by having the third-pass query return zero +// rows (the live-container workspace is filtered out). +func TestSweepOnce_StaleTokenSkippedWhenContainerExists(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + // One running container; first pass returns no removed-row matches. + reaper := &fakeReaper{listResponse: []string{"abc123def456"}} + mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`). + WillReturnRows(sqlmock.NewRows([]string{"id"})) + + // Third-pass query: the live workspace has live tokens but its + // prefix matches the running container, so the NOT LIKE excludes + // it. Result: zero stale tokens. + expectStaleTokenSweepNoOp(mock) + + sweepOnce(context.Background(), reaper) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestSweepOnce_StaleTokenRevokeFailureBailsLoop — a transient DB +// error during revoke must not spam the log on every iteration. +// Bail out of the loop; next 60s cycle retries. +func TestSweepOnce_StaleTokenRevokeFailureBailsLoop(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + reaper := &fakeReaper{listResponse: nil} + + // Third-pass returns two stale-token workspaces; the first revoke + // errors. Loop must bail without attempting the second. + mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`). + WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}). + AddRow("aaaa1111-0000-0000-0000-000000000000"). + AddRow("bbbb2222-0000-0000-0000-000000000000")) + mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval`). + WithArgs("aaaa1111-0000-0000-0000-000000000000", 300). + WillReturnError(errors.New("connection reset")) + // No second ExpectExec: if the loop tries it, sqlmock fails + // "unexpected call". + + sweepOnce(context.Background(), reaper) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestSweepOnce_StaleTokenQueryErrorIsNonFatal — a transient DB error +// on the SELECT must not prevent the rest of sweepOnce from making +// progress. (In this test there's no other progress to make either, +// just verifying no panic + the cycle completes.) +func TestSweepOnce_StaleTokenQueryErrorIsNonFatal(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + reaper := &fakeReaper{listResponse: nil} + + mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`). + WillReturnError(errors.New("connection reset")) + + sweepOnce(context.Background(), reaper) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestSweepOnce_StaleTokenRevokeUsesStalenessPredicate — pin the C1 +// race fix: the per-workspace UPDATE must carry the staleness +// predicate so a token inserted by issueAndInjectToken between our +// SELECT and our UPDATE is automatically excluded (its created_at is +// fresh and won't satisfy `< now() - grace`). +// +// This test asserts the SHAPE of the UPDATE (predicate present, grace +// argument bound). A real-Postgres integration test would prove the +// race resolution end-to-end; this catches the regression where +// someone "simplifies" the UPDATE back to a predicate-only revoke. +func TestSweepOnce_StaleTokenRevokeUsesStalenessPredicate(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + const orphanedID = "deadbeef-0000-0000-0000-000000000000" + reaper := &fakeReaper{listResponse: nil} + + mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`). + WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}). + AddRow(orphanedID)) + + // The UPDATE regex requires every guard: workspace_id binding, + // revoked_at IS NULL, AND the staleness predicate using the SAME + // COALESCE expression as the SELECT. Loosening any of these + // would re-open the C1 race, and this regex would no longer match. + mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval\(secs => \$2\)`). + WithArgs(orphanedID, 300). + WillReturnResult(sqlmock.NewResult(0, 1)) + + sweepOnce(context.Background(), reaper) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestSweepStaleTokens_NilReaperEarlyExit — defence-in-depth (F2): +// even though StartOrphanSweeper short-circuits on nil reaper, the +// individual pass also early-exits. Protects against future refactors +// that wire the pass without the outer guard. +func TestSweepStaleTokens_NilReaperEarlyExit(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + // No DB queries expected. If the early-return is removed, sqlmock + // fails on the unexpected SELECT. + sweepStaleTokensWithoutContainer(context.Background(), nil) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestSweepOnce_StaleTokenSkippedWhenDockerListFails — if the third +// pass can't enumerate containers (Docker hiccup), it must skip the +// query entirely. Otherwise it would query with empty likes and +// revoke every stale-token workspace based on stale information. +func TestSweepOnce_StaleTokenSkippedWhenDockerListFails(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + reaper := &fakeReaper{listErr: errors.New("daemon unreachable")} + + // No DB queries expected: first pass bails on listErr, second + // pass uses managedList (also fails because we never set it), + // third pass also bails on listErr. Verify by NOT registering + // ExpectStaleTokenSweepNoOp — sqlmock fails on any unexpected + // query. + sweepOnce(context.Background(), reaper) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} diff --git a/workspace/main.py b/workspace/main.py index 5c09b134..85e891e2 100644 --- a/workspace/main.py +++ b/workspace/main.py @@ -437,13 +437,23 @@ async def main(): # pragma: no cover ) async def _send_initial_prompt(): """Wait for server to be ready, then send initial_prompt as self-message.""" - # Wait for the A2A server to accept connections + # Wait for the A2A server to accept connections. + # Use the SDK's own constant for the well-known path so this + # probe and the route mounted by create_agent_card_routes() + # never drift apart. Pre-fix this hardcoded the pre-1.x + # well-known path string; a2a-sdk 1.x renamed it (the + # canonical value lives in a2a.utils.constants now), so + # the probe got 404 every attempt and fell through to + # "server not ready after 30s, skipping" even though the + # server was actually serving fine. Net effect: every + # workspace silently dropped its `initial_prompt`. + from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH ready = False for attempt in range(30): await asyncio.sleep(1) try: async with httpx.AsyncClient(timeout=5.0) as client: - resp = await client.get(f"http://127.0.0.1:{port}/.well-known/agent.json") + resp = await client.get(f"http://127.0.0.1:{port}{AGENT_CARD_WELL_KNOWN_PATH}") if resp.status_code == 200: ready = True break diff --git a/workspace/tests/test_agent_card_well_known_path.py b/workspace/tests/test_agent_card_well_known_path.py new file mode 100644 index 00000000..fe06c9fd --- /dev/null +++ b/workspace/tests/test_agent_card_well_known_path.py @@ -0,0 +1,84 @@ +"""Pin the agent-card readiness probe to the SDK's canonical path. + +main.py's _send_initial_prompt() polls the local A2A server's +well-known agent-card URL to know when it's safe to send the initial +prompt as a self-message. Pre-fix the URL was hardcoded to the pre-1.x +literal; a2a-sdk 1.x renamed the well-known path (the canonical value +lives in `a2a.utils.constants.AGENT_CARD_WELL_KNOWN_PATH`), so the +probe got 404 every attempt and silently fell through to "server not +ready after 30s, skipping" — dropping every workspace's +`initial_prompt` from config.yaml. + +The fix is to import the SDK's `AGENT_CARD_WELL_KNOWN_PATH` constant +and use it directly in the probe URL. These tests pin the static +invariants of that fix: + + 1. No hardcoded `/.well-known/agent.json` literal anywhere in + main.py (catches a future contributor reverting to a literal). + 2. The probe URL fstring interpolates `AGENT_CARD_WELL_KNOWN_PATH` + (catches a "fix" that imports the constant for show but still + uses a literal in the actual GET). + +Note: we deliberately do not assert the constant's value or compare +it against `create_agent_card_routes()` here. The runtime SDK is +mocked in this directory's conftest for the executor-test path, so +any test that imports the real `a2a.utils.constants` would either +collide with the mock or require running in a separate pytest session. +The two static invariants are sufficient: by always following whatever +the SDK constant says, we travel through any rename automatically. The +SDK's own contract that `create_agent_card_routes` mounts at the +constant's value is the SDK's responsibility, not ours. +""" + +from __future__ import annotations + +import re +from pathlib import Path + +WORKSPACE_ROOT = Path(__file__).resolve().parents[1] + + +def test_main_uses_sdk_constant_for_agent_card_probe(): + """No hardcoded `/.well-known/agent.json` literal anywhere in main.py. + + The SDK constant (AGENT_CARD_WELL_KNOWN_PATH) is the single source + of truth — string-literal probes drift the moment the SDK renames. + """ + main = (WORKSPACE_ROOT / "main.py").read_text() + + bad_literal = "/.well-known/agent.json" + offenders = [ + (lineno, line) + for lineno, line in enumerate(main.splitlines(), 1) + if bad_literal in line + ] + assert not offenders, ( + f"Found pre-1.x literal {bad_literal!r} in main.py — must use " + f"the SDK's AGENT_CARD_WELL_KNOWN_PATH constant instead. " + f"Offending lines: {offenders}" + ) + + assert ( + "AGENT_CARD_WELL_KNOWN_PATH" in main + ), "main.py must import a2a.utils.constants.AGENT_CARD_WELL_KNOWN_PATH" + + +def test_probe_loop_uses_constant_in_url_format(): + """Spot-check that the URL fstring in main.py interpolates the + constant, not a literal. Catches a future "fix" that imports the + constant for show but still uses a literal in the actual GET.""" + main = (WORKSPACE_ROOT / "main.py").read_text() + + # The probe pattern: `client.get(f"http://127.0.0.1:{port}{...}")` + # where `{...}` must be `{AGENT_CARD_WELL_KNOWN_PATH}`, not a + # hardcoded path. + pattern = re.compile( + r'client\.get\(f"http://127\.0\.0\.1:\{port\}\{(?P[^}]+)\}"\)' + ) + matches = pattern.findall(main) + assert matches, "no readiness probe pattern found in main.py" + for expr in matches: + assert "AGENT_CARD_WELL_KNOWN_PATH" in expr, ( + f"readiness probe URL uses {expr!r} instead of " + f"AGENT_CARD_WELL_KNOWN_PATH" + )