From 0af4012f7914496fa9c7df26907f3141306632a9 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 30 Apr 2026 11:57:27 -0700 Subject: [PATCH] feat(tests): add run-all-replays.sh harness runner Boots the harness, runs every script under replays/, tracks pass/fail, and tears down on exit. Closes the README's TODO for the harness runner that the per-replay-registration comment referenced. Usage: ./run-all-replays.sh # boot, run, teardown KEEP_UP=1 ./run-all-replays.sh # leave harness running on exit REBUILD=1 ./run-all-replays.sh # rebuild images before booting Trap-on-EXIT teardown ensures partial-failure runs don't leak Docker resources. Returns non-zero if any replay failed; CI can adopt this as a single command without per-replay registration. Phase 2 picks this up to wire harness-based E2E as a required check. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/harness/README.md | 17 ++++-- tests/harness/run-all-replays.sh | 90 ++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 4 deletions(-) create mode 100755 tests/harness/run-all-replays.sh diff --git a/tests/harness/README.md b/tests/harness/README.md index d586d36b..1306d8ae 100644 --- a/tests/harness/README.md +++ b/tests/harness/README.md @@ -44,6 +44,15 @@ cd tests/harness ./down.sh # tear down + remove volumes ``` +To run every replay in one shot (boot, seed, run-all, teardown): + +```bash +cd tests/harness +./run-all-replays.sh # full lifecycle; non-zero exit if any replay fails +KEEP_UP=1 ./run-all-replays.sh # leave harness up for debugging +REBUILD=1 ./run-all-replays.sh # rebuild images before booting +``` + First-time setup needs an `/etc/hosts` entry so `harness-tenant.localhost` resolves to the local cf-proxy: @@ -71,8 +80,8 @@ To add a new replay: 2. The script's purpose: reproduce the production failure mode against the harness, then assert the fix is present. PASS criterion is the post-fix behavior. -3. Wire it into the `tests/harness/run-all-replays.sh` runner (TODO, - Phase 2). +3. The `run-all-replays.sh` runner picks up every `replays/*.sh` script + automatically — no per-replay registration needed. ## Extending the cp-stub @@ -102,9 +111,9 @@ its mandate of "exercise the tenant binary in production-shape topology." ## Roadmap -- **Phase 1 (this PR):** harness + cp-stub + cf-proxy + 2 replays. +- **Phase 1 (shipped):** harness + cp-stub + cf-proxy + 2 replays + `run-all-replays.sh` runner. - **Phase 2:** convert `tests/e2e/test_api.sh` to run against the harness instead of localhost. Make harness-based E2E a required CI - check. + check (a workflow that invokes `run-all-replays.sh` on every PR). - **Phase 3:** config-coherence lint that diffs harness env list against production CP's env list, fails CI on drift. diff --git a/tests/harness/run-all-replays.sh b/tests/harness/run-all-replays.sh new file mode 100755 index 00000000..092158c3 --- /dev/null +++ b/tests/harness/run-all-replays.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Run every replay under tests/harness/replays/ against a fresh harness. +# +# Boots the harness (up.sh + seed.sh), runs each `replays/*.sh` in +# alphabetical order, tracks pass/fail, and tears down on exit. Returns +# non-zero if any replay failed. +# +# Usage: +# ./run-all-replays.sh # boot, run, teardown +# KEEP_UP=1 ./run-all-replays.sh # leave harness running on exit (debug) +# REBUILD=1 ./run-all-replays.sh # rebuild images before booting +# +# CI usage: invoke without flags. The trap-on-EXIT teardown ensures we +# don't leak Docker resources when a replay fails partway through. + +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$HERE" + +REPLAYS_DIR="$HERE/replays" +if [ ! -d "$REPLAYS_DIR" ]; then + echo "[run-all] no replays/ directory at $REPLAYS_DIR — nothing to run" + exit 1 +fi + +shopt -s nullglob +REPLAYS=("$REPLAYS_DIR"/*.sh) +shopt -u nullglob +if [ ${#REPLAYS[@]} -eq 0 ]; then + echo "[run-all] replays/ is empty — nothing to run" + exit 1 +fi + +cleanup() { + local exit_code=$? + if [ "${KEEP_UP:-0}" = "1" ]; then + echo "" + echo "[run-all] KEEP_UP=1 — leaving harness up. Tear down manually with ./down.sh" + else + echo "" + echo "[run-all] tearing down harness..." + ./down.sh >/dev/null 2>&1 || echo "[run-all] WARN: ./down.sh exited non-zero" + fi + exit "$exit_code" +} +trap cleanup EXIT INT TERM + +echo "[run-all] booting harness..." +if [ "${REBUILD:-0}" = "1" ]; then + ./up.sh --rebuild +else + ./up.sh +fi + +echo "[run-all] seeding workspaces..." +./seed.sh + +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +FAILED_NAMES=() + +for replay in "${REPLAYS[@]}"; do + name=$(basename "$replay" .sh) + echo "" + echo "[run-all] ━━━ $name ━━━" + if bash "$replay"; then + # Replays signal "skip" by exiting 0 with a __SKIP__ marker in stdout — + # but we capture that as a pass here since the script exited 0. The + # skip is documented in the script's own output. CI uses pass/fail. + PASS_COUNT=$((PASS_COUNT + 1)) + echo "[run-all] PASS: $name" + else + FAIL_COUNT=$((FAIL_COUNT + 1)) + FAILED_NAMES+=("$name") + echo "[run-all] FAIL: $name" + fi +done + +echo "" +echo "[run-all] =============================" +echo "[run-all] Replay summary: ${PASS_COUNT} passed, ${FAIL_COUNT} failed (of ${#REPLAYS[@]} total)" +if [ ${FAIL_COUNT} -gt 0 ]; then + echo "[run-all] Failed:" + for name in "${FAILED_NAMES[@]}"; do + echo "[run-all] - $name" + done + exit 1 +fi +echo "[run-all] All replays passed."