Merge pull request #2411 from Molecule-AI/auto/prod-version-check-script
ops: check-prod-versions.sh — one-line tenant version status
This commit is contained in:
commit
be44e54b77
112
scripts/ops/check-prod-versions.sh
Executable file
112
scripts/ops/check-prod-versions.sh
Executable file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env bash
|
||||
# Check whether production tenants and canvas are running latest main.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/ops/check-prod-versions.sh # production
|
||||
# ENV=staging ./scripts/ops/check-prod-versions.sh # staging tenants
|
||||
#
|
||||
# Outputs a table of {surface, current_sha, expected_sha, status}. Returns
|
||||
# non-zero if any surface is stale so this can be wired into a periodic
|
||||
# alert.
|
||||
#
|
||||
# Why this exists: every time someone hits a "is the fix live?" question,
|
||||
# they have to remember the curl pattern + cross-reference with
|
||||
# `git rev-parse origin/main`. This script does that check uniformly across
|
||||
# every public surface (workspace tenants + canvas) and gives a one-line
|
||||
# verdict instead of a stack of one-off curls.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ENV="${ENV:-production}"
|
||||
EXPECTED_REF="${EXPECTED_REF:-main}"
|
||||
|
||||
case "$ENV" in
|
||||
production)
|
||||
TENANT_DOMAIN="moleculesai.app"
|
||||
CANVAS_URL="https://canvas.moleculesai.app"
|
||||
# Default canary tenant for production. Override via TENANT_SLUGS=
|
||||
# to cover a custom set.
|
||||
DEFAULT_TENANTS="hongmingwang reno-stars"
|
||||
;;
|
||||
staging)
|
||||
TENANT_DOMAIN="staging.moleculesai.app"
|
||||
CANVAS_URL="https://canvas-staging.moleculesai.app"
|
||||
DEFAULT_TENANTS="" # staging tenants are ephemeral; user must specify
|
||||
;;
|
||||
*)
|
||||
echo "Unknown ENV=$ENV (expected: production | staging)" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
TENANT_SLUGS="${TENANT_SLUGS:-$DEFAULT_TENANTS}"
|
||||
|
||||
# Pull EXPECTED_SHA from GitHub. Falls back to local git if gh isn't
|
||||
# logged in — local main may lag origin but is usually close enough for
|
||||
# debugging, and we still report the comparison clearly.
|
||||
EXPECTED_SHA=""
|
||||
if command -v gh >/dev/null 2>&1; then
|
||||
EXPECTED_SHA=$(gh api "repos/Molecule-AI/molecule-core/commits/${EXPECTED_REF}" --jq '.sha' 2>/dev/null || true)
|
||||
fi
|
||||
if [ -z "$EXPECTED_SHA" ]; then
|
||||
if git rev-parse "origin/${EXPECTED_REF}" >/dev/null 2>&1; then
|
||||
EXPECTED_SHA=$(git rev-parse "origin/${EXPECTED_REF}")
|
||||
echo "[check-prod-versions] WARN: gh unavailable, using local origin/${EXPECTED_REF}=${EXPECTED_SHA:0:7} (may lag)"
|
||||
else
|
||||
echo "[check-prod-versions] ERROR: cannot resolve expected SHA — gh not logged in and origin/${EXPECTED_REF} not fetched" >&2
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
EXPECTED_SHORT="${EXPECTED_SHA:0:7}"
|
||||
|
||||
echo "Checking ${ENV} surfaces against ${EXPECTED_REF}=${EXPECTED_SHORT}"
|
||||
echo ""
|
||||
printf "%-25s %-9s %-9s %s\n" "Surface" "Live" "Expected" "Status"
|
||||
printf "%-25s %-9s %-9s %s\n" "-------" "----" "--------" "------"
|
||||
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
|
||||
# Tenant surfaces — workspace-server /buildinfo (added in PR #2398).
|
||||
for slug in $TENANT_SLUGS; do
|
||||
URL="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
BODY=$(curl -sS --max-time 15 "$URL" 2>/dev/null || echo "")
|
||||
ACTUAL_SHA=$(echo "$BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$ACTUAL_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ unreachable\n" "tenant: $slug" "—" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$ACTUAL_SHA" = "$EXPECTED_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ✓ current\n" "tenant: $slug" "${ACTUAL_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
else
|
||||
printf "%-25s %-9s %-9s ✗ stale\n" "tenant: $slug" "${ACTUAL_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Canvas — Next.js /api/buildinfo (PR #2407). Vercel injects
|
||||
# VERCEL_GIT_COMMIT_SHA at build time so this reflects the deployed
|
||||
# commit, not the request time.
|
||||
CANVAS_BODY=$(curl -sS --max-time 15 "${CANVAS_URL}/api/buildinfo" 2>/dev/null || echo "")
|
||||
CANVAS_SHA=$(echo "$CANVAS_BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$CANVAS_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ unreachable (route may not be deployed yet)\n" "canvas" "—" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$CANVAS_SHA" = "dev" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ dev sentinel (Vercel env not injected — check VERCEL_GIT_COMMIT_SHA)\n" "canvas" "dev" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$CANVAS_SHA" = "$EXPECTED_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ✓ current\n" "canvas" "${CANVAS_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
else
|
||||
printf "%-25s %-9s %-9s ✗ stale\n" "canvas" "${CANVAS_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
if [ $STALE_COUNT -eq 0 ] && [ $UNREACHABLE_COUNT -eq 0 ]; then
|
||||
echo "All surfaces current."
|
||||
exit 0
|
||||
fi
|
||||
echo "Summary: ${STALE_COUNT} stale, ${UNREACHABLE_COUNT} unreachable."
|
||||
# Stale is a deploy gap; unreachable is operational (DNS, CF, route absent).
|
||||
# Both are signal — exit non-zero so cron / CI can alert.
|
||||
exit 1
|
||||
Loading…
Reference in New Issue
Block a user