diff --git a/.github/workflows-disabled/README.md b/.github/workflows-disabled/README.md new file mode 100644 index 0000000..56a2805 --- /dev/null +++ b/.github/workflows-disabled/README.md @@ -0,0 +1,22 @@ +# Disabled upptime workflows + +These five workflows (`graphs.yml`, `response-time.yml`, +`static-site.yml`, `summary.yml`, `uptime.yml`) are upptime-driven +and call `api.github.com` for releases lookup, issue management, and +result commits. + +Post the 2026-05-06 GitHub org suspension, no token in our org +authenticates against api.github.com, so every scheduled run failed +with HTTP 401 "Bad credentials". See `molecule-ai-status#2` for full +diagnosis + the replacement plan. + +Workflows here will not be re-enabled — they're moved to +`workflows-disabled/` so the failed-run noise stops while the +replacement (Gitea-native uptime probe at +`molecule-ai/molecule-ai-uptime-probe`) is built. The new probe runs +under `.github/workflows/uptime-probe.yml`. + +Delete this directory after the replacement has run for ~7 days +clean and the existing history is either migrated or marked archived. + +Tracked: molecule-ai-status#2 diff --git a/.github/workflows/graphs.yml b/.github/workflows-disabled/graphs.yml similarity index 100% rename from .github/workflows/graphs.yml rename to .github/workflows-disabled/graphs.yml diff --git a/.github/workflows/response-time.yml b/.github/workflows-disabled/response-time.yml similarity index 100% rename from .github/workflows/response-time.yml rename to .github/workflows-disabled/response-time.yml diff --git a/.github/workflows/static-site.yml b/.github/workflows-disabled/static-site.yml similarity index 100% rename from .github/workflows/static-site.yml rename to .github/workflows-disabled/static-site.yml diff --git a/.github/workflows/summary.yml b/.github/workflows-disabled/summary.yml similarity index 100% rename from .github/workflows/summary.yml rename to .github/workflows-disabled/summary.yml diff --git a/.github/workflows/uptime.yml b/.github/workflows-disabled/uptime.yml similarity index 100% rename from .github/workflows/uptime.yml rename to .github/workflows-disabled/uptime.yml diff --git a/.github/workflows/uptime-probe.yml b/.github/workflows/uptime-probe.yml new file mode 100644 index 0000000..601da19 --- /dev/null +++ b/.github/workflows/uptime-probe.yml @@ -0,0 +1,101 @@ +name: Uptime probe (Gitea-native — replaces upptime) +# +# Runs the molecule-ai-uptime-probe binary on a 5-minute cadence, +# appends per-site JSONL results to history/, and commits the changes +# back to main. Replaces the five upptime workflows that lived in this +# repo before they were moved to .github/workflows-disabled/ (because +# every upptime call to api.github.com 401s post-2026-05-06 GitHub +# org suspension). +# +# See molecule-ai/molecule-ai-status#2 for the design rationale + +# molecule-ai/molecule-ai-uptime-probe for the probe binary itself. +# +# Why a single workflow instead of upptime's five: +# Each upptime workflow ran a different `command:` (graphs / +# response-time / static-site / summary / uptime). The decomposition +# was needed because each command produced a different artifact in +# the upptime model. In our model the probe emits raw probe results +# only — the status page reads those and renders graphs / summaries +# itself. One concern per tool. One workflow. + +on: + schedule: + # Every 5 minutes — matches the upptime default cadence. + - cron: "*/5 * * * *" + # Manual trigger for ad-hoc checks. + workflow_dispatch: + # Re-run when probe-list config changes so a new endpoint gets a + # baseline immediately, not at the next /5 mark. + push: + branches: [main] + paths: [".upptimerc.yml"] + +permissions: + contents: write # required to commit history/ updates + +jobs: + probe: + name: Probe + commit + runs-on: ubuntu-latest + # Concurrency: at most one probe run at a time per branch. Two + # cron firings overlapping would race on history/ commits. + concurrency: + group: uptime-probe-${{ github.ref }} + cancel-in-progress: false + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 1 + persist-credentials: true + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.23' + token: ${{ secrets.GITEA_TOKEN }} # see molecule-ai/internal#75 + + - name: Install probe + # Build directly from the probe's repo at a pinned commit. Pin + # is updated explicitly in this workflow file when the probe + # itself ships a new behaviour-changing version. Avoids + # supply-chain ambiguity. + run: | + set -euo pipefail + GOPROBE_REPO=https://git.moleculesai.app/molecule-ai/molecule-ai-uptime-probe.git + GOPROBE_REF=main + tmp=$(mktemp -d) + git clone --depth 1 --branch "$GOPROBE_REF" "$GOPROBE_REPO" "$tmp/probe" + (cd "$tmp/probe" && go build -o /usr/local/bin/uptime-probe ./cmd/probe) + /usr/local/bin/uptime-probe -h 2>&1 | head -5 + + - name: Run probes + # Exit 1 from the probe when any site fails — but we don't + # want a single failing site to abort the workflow before the + # commit step. `|| true` swallows the non-zero exit; the + # failure shows up as success=false in the JSONL history, + # where the status page picks it up. + run: | + mkdir -p history + /usr/local/bin/uptime-probe \ + -config .upptimerc.yml \ + -history-dir history \ + -timeout 30s \ + > /tmp/run.json || true + echo "== run summary ==" + jq -r '.[] | "\(.name): \(.status_code) \(.latency_ms)ms success=\(.success)"' /tmp/run.json || cat /tmp/run.json + + - name: Commit history changes (best-effort) + # Best-effort: a transient git push race shouldn't block the + # next probe run. The next /5 firing will commit again. + run: | + set +e + git config user.name "uptime-probe[bot]" + git config user.email "uptime-probe@bots.moleculesai.app" + git add history/ + if git diff --cached --quiet; then + echo "no history changes to commit" + exit 0 + fi + git commit -m "chore(uptime): probe results $(date -u +%Y-%m-%dT%H:%M:%SZ)" + git push origin HEAD:main || echo "push failed; next run will retry" diff --git a/site/app.js b/site/app.js new file mode 100644 index 0000000..a4b383e --- /dev/null +++ b/site/app.js @@ -0,0 +1,244 @@ +// status.moleculesai.app — read-only status page for Molecules AI services. +// +// Pulls the probe-list config + per-site history JSONL from the +// molecule-ai-status repo on Gitea, renders a one-row-per-service +// dashboard with current state + a 24h-history sparkline. +// +// Why no framework: this page is plain DOM + fetch. Zero build step, +// zero dependencies, zero supply-chain surface. The thing it MUST do +// well is "load fast, show correct status, never lie." React/Vue +// would be cargo-culting at this scale. +// +// Data source: same-origin /data/* paths, Vercel-rewritten to +// git.moleculesai.app raw URLs. The rewrite avoids cross-origin +// browser fetches (Gitea doesn't send Access-Control-Allow-Origin +// on raw file responses). vercel.json owns the rewrite map. + +const HISTORY_URL = (slug) => `/data/history/${slug}.jsonl`; +const CONFIG_URL = `/data/.upptimerc.yml`; +const REPO_BROWSE = "https://git.moleculesai.app/molecule-ai/molecule-ai-status"; + +// Window of history we render in the sparkline (24h of probes at one +// per 5 minutes ≈ 288). Cap to keep the DOM bounded if a site has +// been probing for years. +const SPARKLINE_LIMIT = 288; + +// Slugify must match the probe binary's slugify() in cmd/probe/main.go +// — the page reads files the probe writes, so the slugging rule is +// load-bearing. Mirror in tests if/when this gets a follow-up. +function slugify(s) { + let out = ""; + let last = "-"; + for (const c of s.toLowerCase()) { + const isAlnum = (c >= "a" && c <= "z") || (c >= "0" && c <= "9"); + if (isAlnum) { + out += c; + last = c; + } else if (last !== "-") { + out += "-"; + last = "-"; + } + } + return out.replace(/^-+|-+$/g, ""); +} + +// Minimal YAML parser for the subset of .upptimerc.yml we read: +// only the `sites:` list of `{name, url}`. Anything more elaborate +// (anchors, multiline strings, etc.) is overkill — the upstream +// upptime config schema is intentionally simple. +function parseSites(yamlText) { + const sites = []; + let inSites = false; + let current = null; + for (const rawLine of yamlText.split("\n")) { + const line = rawLine.replace(/\r$/, ""); + if (line.startsWith("#")) continue; + if (/^\s*$/.test(line)) continue; + + if (/^sites:\s*$/.test(line)) { + inSites = true; + continue; + } + if (inSites && /^[a-zA-Z]/.test(line)) { + // hit a top-level key after sites: — bail + inSites = false; + } + if (!inSites) continue; + + const itemStart = line.match(/^\s*-\s+name:\s*(.+)$/); + if (itemStart) { + if (current) sites.push(current); + current = { name: itemStart[1].trim().replace(/^["']|["']$/g, "") }; + continue; + } + const urlMatch = line.match(/^\s+url:\s*(.+)$/); + if (urlMatch && current) { + current.url = urlMatch[1].trim().replace(/^["']|["']$/g, ""); + } + } + if (current) sites.push(current); + return sites.filter((s) => s.name && s.url); +} + +// Parse a JSONL response into an array of Result objects. Tolerant of +// trailing newlines + (rarely) blank lines from a partial-write race. +function parseJSONL(text) { + const out = []; + for (const line of text.split("\n")) { + if (!line.trim()) continue; + try { + out.push(JSON.parse(line)); + } catch { + // skip malformed line — better than the whole page erroring + } + } + return out; +} + +// Best-effort fetch — returns null on failure (no exceptions). +async function fetchText(url) { + try { + const resp = await fetch(url, { cache: "no-cache" }); + if (!resp.ok) return null; + return await resp.text(); + } catch { + return null; + } +} + +// Render a row for one site given its latest results. +function renderRow(site, results) { + const last = results[results.length - 1]; + const status = !last ? "unknown" : last.success ? "up" : "down"; + const latency = last && last.success ? `${last.latency_ms} ms` : "—"; + + // Sparkline: last SPARKLINE_LIMIT entries, one bar per. Bar height + // proportional to latency (clamped). Failing checks render red and + // taller (so eye is drawn to outages). + const recent = results.slice(-SPARKLINE_LIMIT); + const succ = recent.filter((r) => r.success); + const maxLat = Math.max(50, ...succ.map((r) => r.latency_ms)); + + const spark = recent + .map((r) => { + const cls = r.success ? "" : "fail"; + const h = !r.success ? 20 : Math.max(2, Math.round((r.latency_ms / maxLat) * 18)); + return ``; + }) + .join(""); + + return ` +
+ `; +} + +function escape(s) { + return String(s).replace(/[&<>"']/g, (c) => ({ + "&": "&", "<": "<", ">": ">", '"': """, "'": "'", + })[c]); +} + +function renderSummary(rows) { + const total = rows.length; + const up = rows.filter((r) => r.status === "up").length; + const down = rows.filter((r) => r.status === "down").length; + const unknown = rows.filter((r) => r.status === "unknown").length; + + let dot, text, sub; + if (total === 0) { + dot = "var(--ink-soft)"; + text = "No services configured"; + sub = "Add `.upptimerc.yml` entries."; + } else if (down === 0 && unknown === 0) { + dot = "var(--green)"; + text = "All systems operational"; + sub = `${up} of ${total} services responding normally.`; + } else if (down === 0) { + dot = "var(--amber)"; + text = "Status partially unknown"; + sub = `${up} up · ${unknown} no recent data.`; + } else if (up === 0) { + dot = "var(--red)"; + text = "Major outage"; + sub = `${down} services failing.`; + } else { + dot = "var(--amber)"; + text = "Partial outage"; + sub = `${up} up · ${down} down · ${unknown} unknown.`; + } + return ` + +${CONFIG_URL} is reachable (Vercel rewrites /data/* to ${REPO_BROWSE}/raw/branch/main/$1)..upptimerc.yml.