diff --git a/.gitea/workflows/e2e-api.yml b/.gitea/workflows/e2e-api.yml index c1b9eea6e..d23572fa2 100644 --- a/.gitea/workflows/e2e-api.yml +++ b/.gitea/workflows/e2e-api.yml @@ -327,7 +327,12 @@ jobs: # start-redis steps point at this run's per-run host ports. ./platform-server > platform.log 2>&1 & echo $! > platform.pid - - name: Wait for /health + - name: Wait for /health (with migration completion gate) + # Issue #2205: 30 one-second probes is insufficient when the migration + # chain is still running; /health can flip true before migrations + # finish, so subsequent steps that touch the DB fail. Hybrid fix: + # bump timeout to 300s AND gate exit on the same workspaces-table + # existence check the downstream "Assert migrations applied" uses. if: needs.detect-changes.outputs.api == 'true' run: | # Readiness signal: the platform binds /health only AFTER the full @@ -343,13 +348,21 @@ jobs: # background platform-server process has exited (e.g. a broken # migration crashed it), we stop and fail loudly at once instead of # waiting out the whole budget. - DEADLINE_SECS=180 # cold-start + full migration chain headroom + # + # Issue #2205: /health can flip true before migrations finish on a + # growing chain, so we gate exit on the workspaces-table existence + # check the downstream "Assert migrations applied" uses. + DEADLINE_SECS=300 # cold-start + full migration chain headroom PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)" start=$(date +%s) while :; do if curl -sf "$BASE/health" > /dev/null; then - echo "Platform healthy after $(( $(date +%s) - start ))s" - exit 0 + tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \ + "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo "0") + if [ "$tables" = "1" ]; then + echo "Platform healthy + migrations applied after $(( $(date +%s) - start ))s" + exit 0 + fi fi # Fast-fail: if the platform process died, /health will never come. if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then @@ -358,12 +371,13 @@ jobs: exit 1 fi if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then - echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below" + echo "::error::Platform did not become healthy with migrations applied within ${DEADLINE_SECS}s — see log below" cat workspace-server/platform.log || true exit 1 fi sleep 1 done + - name: Assert migrations applied if: needs.detect-changes.outputs.api == 'true' run: |