forked from molecule-ai/molecule-core
ops: scripts/ops/check-prod-versions.sh — one-line "is each tenant on latest?"
Iterates a list of tenant slugs (default canary set on production,
operator-supplied on staging), curls each tenant's /buildinfo plus
canvas's /api/buildinfo, compares to origin/main's HEAD SHA, prints a
table with one of {current, stale, unreachable} per surface. Returns
non-zero if any surface is stale, so it can be wired into a periodic
alert later.
Why this exists: every "is the fix live?" question used to be
answered with a one-off curl + git rev-parse + manual diff. This
script does that uniformly across every public surface (workspace
tenants + canvas) and is parseable. The redeploy verifier (#2398)
covers the deploy moment; this covers any-time-after.
Reads EXPECTED_SHA from `gh api repos/Molecule-AI/molecule-core/
commits/main` so it always reflects the actual upstream tip, not
local working-copy state. Falls back to local origin/main with a
WARN if `gh` isn't logged in — debugging is still useful even if
the comparison may lag.
Depends on:
- #2409 (TenantGuard /buildinfo allowlist) — without it every
tenant looks "unreachable" because the route 404s before the
handler. Already merged on staging; will hit production after
the next staging→main fast-forward + redeploy.
- #2407 (canvas /api/buildinfo) — already on main + Vercel.
Usage:
./scripts/ops/check-prod-versions.sh # production canary set
TENANT_SLUGS="a b c" ./scripts/ops/check-prod-versions.sh # custom set
ENV=staging TENANT_SLUGS="..." ./scripts/ops/check-prod-versions.sh
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9c7b14923d
commit
41d5f9558f
112
scripts/ops/check-prod-versions.sh
Executable file
112
scripts/ops/check-prod-versions.sh
Executable file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env bash
|
||||
# Check whether production tenants and canvas are running latest main.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/ops/check-prod-versions.sh # production
|
||||
# ENV=staging ./scripts/ops/check-prod-versions.sh # staging tenants
|
||||
#
|
||||
# Outputs a table of {surface, current_sha, expected_sha, status}. Returns
|
||||
# non-zero if any surface is stale so this can be wired into a periodic
|
||||
# alert.
|
||||
#
|
||||
# Why this exists: every time someone hits a "is the fix live?" question,
|
||||
# they have to remember the curl pattern + cross-reference with
|
||||
# `git rev-parse origin/main`. This script does that check uniformly across
|
||||
# every public surface (workspace tenants + canvas) and gives a one-line
|
||||
# verdict instead of a stack of one-off curls.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ENV="${ENV:-production}"
|
||||
EXPECTED_REF="${EXPECTED_REF:-main}"
|
||||
|
||||
case "$ENV" in
|
||||
production)
|
||||
TENANT_DOMAIN="moleculesai.app"
|
||||
CANVAS_URL="https://canvas.moleculesai.app"
|
||||
# Default canary tenant for production. Override via TENANT_SLUGS=
|
||||
# to cover a custom set.
|
||||
DEFAULT_TENANTS="hongmingwang reno-stars"
|
||||
;;
|
||||
staging)
|
||||
TENANT_DOMAIN="staging.moleculesai.app"
|
||||
CANVAS_URL="https://canvas-staging.moleculesai.app"
|
||||
DEFAULT_TENANTS="" # staging tenants are ephemeral; user must specify
|
||||
;;
|
||||
*)
|
||||
echo "Unknown ENV=$ENV (expected: production | staging)" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
TENANT_SLUGS="${TENANT_SLUGS:-$DEFAULT_TENANTS}"
|
||||
|
||||
# Pull EXPECTED_SHA from GitHub. Falls back to local git if gh isn't
|
||||
# logged in — local main may lag origin but is usually close enough for
|
||||
# debugging, and we still report the comparison clearly.
|
||||
EXPECTED_SHA=""
|
||||
if command -v gh >/dev/null 2>&1; then
|
||||
EXPECTED_SHA=$(gh api "repos/Molecule-AI/molecule-core/commits/${EXPECTED_REF}" --jq '.sha' 2>/dev/null || true)
|
||||
fi
|
||||
if [ -z "$EXPECTED_SHA" ]; then
|
||||
if git rev-parse "origin/${EXPECTED_REF}" >/dev/null 2>&1; then
|
||||
EXPECTED_SHA=$(git rev-parse "origin/${EXPECTED_REF}")
|
||||
echo "[check-prod-versions] WARN: gh unavailable, using local origin/${EXPECTED_REF}=${EXPECTED_SHA:0:7} (may lag)"
|
||||
else
|
||||
echo "[check-prod-versions] ERROR: cannot resolve expected SHA — gh not logged in and origin/${EXPECTED_REF} not fetched" >&2
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
EXPECTED_SHORT="${EXPECTED_SHA:0:7}"
|
||||
|
||||
echo "Checking ${ENV} surfaces against ${EXPECTED_REF}=${EXPECTED_SHORT}"
|
||||
echo ""
|
||||
printf "%-25s %-9s %-9s %s\n" "Surface" "Live" "Expected" "Status"
|
||||
printf "%-25s %-9s %-9s %s\n" "-------" "----" "--------" "------"
|
||||
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
|
||||
# Tenant surfaces — workspace-server /buildinfo (added in PR #2398).
|
||||
for slug in $TENANT_SLUGS; do
|
||||
URL="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
BODY=$(curl -sS --max-time 15 "$URL" 2>/dev/null || echo "")
|
||||
ACTUAL_SHA=$(echo "$BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$ACTUAL_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ unreachable\n" "tenant: $slug" "—" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$ACTUAL_SHA" = "$EXPECTED_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ✓ current\n" "tenant: $slug" "${ACTUAL_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
else
|
||||
printf "%-25s %-9s %-9s ✗ stale\n" "tenant: $slug" "${ACTUAL_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Canvas — Next.js /api/buildinfo (PR #2407). Vercel injects
|
||||
# VERCEL_GIT_COMMIT_SHA at build time so this reflects the deployed
|
||||
# commit, not the request time.
|
||||
CANVAS_BODY=$(curl -sS --max-time 15 "${CANVAS_URL}/api/buildinfo" 2>/dev/null || echo "")
|
||||
CANVAS_SHA=$(echo "$CANVAS_BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$CANVAS_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ unreachable (route may not be deployed yet)\n" "canvas" "—" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$CANVAS_SHA" = "dev" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ dev sentinel (Vercel env not injected — check VERCEL_GIT_COMMIT_SHA)\n" "canvas" "dev" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$CANVAS_SHA" = "$EXPECTED_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ✓ current\n" "canvas" "${CANVAS_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
else
|
||||
printf "%-25s %-9s %-9s ✗ stale\n" "canvas" "${CANVAS_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
if [ $STALE_COUNT -eq 0 ] && [ $UNREACHABLE_COUNT -eq 0 ]; then
|
||||
echo "All surfaces current."
|
||||
exit 0
|
||||
fi
|
||||
echo "Summary: ${STALE_COUNT} stale, ${UNREACHABLE_COUNT} unreachable."
|
||||
# Stale is a deploy gap; unreachable is operational (DNS, CF, route absent).
|
||||
# Both are signal — exit non-zero so cron / CI can alert.
|
||||
exit 1
|
||||
Loading…
Reference in New Issue
Block a user