[migrate] Replace upptime with Gitea-native uptime probe (closes #2) #4

Open
claude-ceo-assistant wants to merge 3 commits from feat/uptime-probe-cron-issue2 into main
10 changed files with 537 additions and 0 deletions

22
.github/workflows-disabled/README.md vendored Normal file
View File

@ -0,0 +1,22 @@
# Disabled upptime workflows
These five workflows (`graphs.yml`, `response-time.yml`,
`static-site.yml`, `summary.yml`, `uptime.yml`) are upptime-driven
and call `api.github.com` for releases lookup, issue management, and
result commits.
Post the 2026-05-06 GitHub org suspension, no token in our org
authenticates against api.github.com, so every scheduled run failed
with HTTP 401 "Bad credentials". See `molecule-ai-status#2` for full
diagnosis + the replacement plan.
Workflows here will not be re-enabled — they're moved to
`workflows-disabled/` so the failed-run noise stops while the
replacement (Gitea-native uptime probe at
`molecule-ai/molecule-ai-uptime-probe`) is built. The new probe runs
under `.github/workflows/uptime-probe.yml`.
Delete this directory after the replacement has run for ~7 days
clean and the existing history is either migrated or marked archived.
Tracked: molecule-ai-status#2

101
.github/workflows/uptime-probe.yml vendored Normal file
View File

@ -0,0 +1,101 @@
name: Uptime probe (Gitea-native — replaces upptime)
#
# Runs the molecule-ai-uptime-probe binary on a 5-minute cadence,
# appends per-site JSONL results to history/, and commits the changes
# back to main. Replaces the five upptime workflows that lived in this
# repo before they were moved to .github/workflows-disabled/ (because
# every upptime call to api.github.com 401s post-2026-05-06 GitHub
# org suspension).
#
# See molecule-ai/molecule-ai-status#2 for the design rationale +
# molecule-ai/molecule-ai-uptime-probe for the probe binary itself.
#
# Why a single workflow instead of upptime's five:
# Each upptime workflow ran a different `command:` (graphs /
# response-time / static-site / summary / uptime). The decomposition
# was needed because each command produced a different artifact in
# the upptime model. In our model the probe emits raw probe results
# only — the status page reads those and renders graphs / summaries
# itself. One concern per tool. One workflow.
on:
schedule:
# Every 5 minutes — matches the upptime default cadence.
- cron: "*/5 * * * *"
# Manual trigger for ad-hoc checks.
workflow_dispatch:
# Re-run when probe-list config changes so a new endpoint gets a
# baseline immediately, not at the next /5 mark.
push:
branches: [main]
paths: [".upptimerc.yml"]
permissions:
contents: write # required to commit history/ updates
jobs:
probe:
name: Probe + commit
runs-on: ubuntu-latest
# Concurrency: at most one probe run at a time per branch. Two
# cron firings overlapping would race on history/ commits.
concurrency:
group: uptime-probe-${{ github.ref }}
cancel-in-progress: false
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
fetch-depth: 1
persist-credentials: true
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: '1.23'
token: ${{ secrets.GITEA_TOKEN }} # see molecule-ai/internal#75
- name: Install probe
# Build directly from the probe's repo at a pinned commit. Pin
# is updated explicitly in this workflow file when the probe
# itself ships a new behaviour-changing version. Avoids
# supply-chain ambiguity.
run: |
set -euo pipefail
GOPROBE_REPO=https://git.moleculesai.app/molecule-ai/molecule-ai-uptime-probe.git
GOPROBE_REF=main
tmp=$(mktemp -d)
git clone --depth 1 --branch "$GOPROBE_REF" "$GOPROBE_REPO" "$tmp/probe"
(cd "$tmp/probe" && go build -o /usr/local/bin/uptime-probe ./cmd/probe)
/usr/local/bin/uptime-probe -h 2>&1 | head -5
- name: Run probes
# Exit 1 from the probe when any site fails — but we don't
# want a single failing site to abort the workflow before the
# commit step. `|| true` swallows the non-zero exit; the
# failure shows up as success=false in the JSONL history,
# where the status page picks it up.
run: |
mkdir -p history
/usr/local/bin/uptime-probe \
-config .upptimerc.yml \
-history-dir history \
-timeout 30s \
> /tmp/run.json || true
echo "== run summary =="
jq -r '.[] | "\(.name): \(.status_code) \(.latency_ms)ms success=\(.success)"' /tmp/run.json || cat /tmp/run.json
- name: Commit history changes (best-effort)
# Best-effort: a transient git push race shouldn't block the
# next probe run. The next /5 firing will commit again.
run: |
set +e
git config user.name "uptime-probe[bot]"
git config user.email "uptime-probe@bots.moleculesai.app"
git add history/
if git diff --cached --quiet; then
echo "no history changes to commit"
exit 0
fi
git commit -m "chore(uptime): probe results $(date -u +%Y-%m-%dT%H:%M:%SZ)"
git push origin HEAD:main || echo "push failed; next run will retry"

244
site/app.js Normal file
View File

@ -0,0 +1,244 @@
// status.moleculesai.app — read-only status page for Molecules AI services.
//
// Pulls the probe-list config + per-site history JSONL from the
// molecule-ai-status repo on Gitea, renders a one-row-per-service
// dashboard with current state + a 24h-history sparkline.
//
// Why no framework: this page is plain DOM + fetch. Zero build step,
// zero dependencies, zero supply-chain surface. The thing it MUST do
// well is "load fast, show correct status, never lie." React/Vue
// would be cargo-culting at this scale.
//
// Data source: same-origin /data/* paths, Vercel-rewritten to
// git.moleculesai.app raw URLs. The rewrite avoids cross-origin
// browser fetches (Gitea doesn't send Access-Control-Allow-Origin
// on raw file responses). vercel.json owns the rewrite map.
const HISTORY_URL = (slug) => `/data/history/${slug}.jsonl`;
const CONFIG_URL = `/data/.upptimerc.yml`;
const REPO_BROWSE = "https://git.moleculesai.app/molecule-ai/molecule-ai-status";
// Window of history we render in the sparkline (24h of probes at one
// per 5 minutes ≈ 288). Cap to keep the DOM bounded if a site has
// been probing for years.
const SPARKLINE_LIMIT = 288;
// Slugify must match the probe binary's slugify() in cmd/probe/main.go
// — the page reads files the probe writes, so the slugging rule is
// load-bearing. Mirror in tests if/when this gets a follow-up.
function slugify(s) {
let out = "";
let last = "-";
for (const c of s.toLowerCase()) {
const isAlnum = (c >= "a" && c <= "z") || (c >= "0" && c <= "9");
if (isAlnum) {
out += c;
last = c;
} else if (last !== "-") {
out += "-";
last = "-";
}
}
return out.replace(/^-+|-+$/g, "");
}
// Minimal YAML parser for the subset of .upptimerc.yml we read:
// only the `sites:` list of `{name, url}`. Anything more elaborate
// (anchors, multiline strings, etc.) is overkill — the upstream
// upptime config schema is intentionally simple.
function parseSites(yamlText) {
const sites = [];
let inSites = false;
let current = null;
for (const rawLine of yamlText.split("\n")) {
const line = rawLine.replace(/\r$/, "");
if (line.startsWith("#")) continue;
if (/^\s*$/.test(line)) continue;
if (/^sites:\s*$/.test(line)) {
inSites = true;
continue;
}
if (inSites && /^[a-zA-Z]/.test(line)) {
// hit a top-level key after sites: — bail
inSites = false;
}
if (!inSites) continue;
const itemStart = line.match(/^\s*-\s+name:\s*(.+)$/);
if (itemStart) {
if (current) sites.push(current);
current = { name: itemStart[1].trim().replace(/^["']|["']$/g, "") };
continue;
}
const urlMatch = line.match(/^\s+url:\s*(.+)$/);
if (urlMatch && current) {
current.url = urlMatch[1].trim().replace(/^["']|["']$/g, "");
}
}
if (current) sites.push(current);
return sites.filter((s) => s.name && s.url);
}
// Parse a JSONL response into an array of Result objects. Tolerant of
// trailing newlines + (rarely) blank lines from a partial-write race.
function parseJSONL(text) {
const out = [];
for (const line of text.split("\n")) {
if (!line.trim()) continue;
try {
out.push(JSON.parse(line));
} catch {
// skip malformed line — better than the whole page erroring
}
}
return out;
}
// Best-effort fetch — returns null on failure (no exceptions).
async function fetchText(url) {
try {
const resp = await fetch(url, { cache: "no-cache" });
if (!resp.ok) return null;
return await resp.text();
} catch {
return null;
}
}
// Render a row for one site given its latest results.
function renderRow(site, results) {
const last = results[results.length - 1];
const status = !last ? "unknown" : last.success ? "up" : "down";
const latency = last && last.success ? `${last.latency_ms} ms` : "—";
// Sparkline: last SPARKLINE_LIMIT entries, one bar per. Bar height
// proportional to latency (clamped). Failing checks render red and
// taller (so eye is drawn to outages).
const recent = results.slice(-SPARKLINE_LIMIT);
const succ = recent.filter((r) => r.success);
const maxLat = Math.max(50, ...succ.map((r) => r.latency_ms));
const spark = recent
.map((r) => {
const cls = r.success ? "" : "fail";
const h = !r.success ? 20 : Math.max(2, Math.round((r.latency_ms / maxLat) * 18));
return `<span class="${cls}" style="height:${h}px" title="${r.timestamp} · ${r.success ? r.latency_ms + "ms" : "FAIL: " + (r.error || "")}"></span>`;
})
.join("");
return `
<div class="row" data-status="${status}">
<div class="dot ${status}" title="${status}"></div>
<div class="row-name">
<a href="${site.url}" target="_blank" rel="noopener noreferrer">${escape(site.name)}</a>
<span class="url">${escape(site.url)}</span>
</div>
<div class="row-spark" title="last ${recent.length} checks (newest right)">${spark}</div>
<div class="row-latency">${latency}</div>
</div>
`;
}
function escape(s) {
return String(s).replace(/[&<>"']/g, (c) => ({
"&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;",
})[c]);
}
function renderSummary(rows) {
const total = rows.length;
const up = rows.filter((r) => r.status === "up").length;
const down = rows.filter((r) => r.status === "down").length;
const unknown = rows.filter((r) => r.status === "unknown").length;
let dot, text, sub;
if (total === 0) {
dot = "var(--ink-soft)";
text = "No services configured";
sub = "Add `.upptimerc.yml` entries.";
} else if (down === 0 && unknown === 0) {
dot = "var(--green)";
text = "All systems operational";
sub = `${up} of ${total} services responding normally.`;
} else if (down === 0) {
dot = "var(--amber)";
text = "Status partially unknown";
sub = `${up} up · ${unknown} no recent data.`;
} else if (up === 0) {
dot = "var(--red)";
text = "Major outage";
sub = `${down} services failing.`;
} else {
dot = "var(--amber)";
text = "Partial outage";
sub = `${up} up · ${down} down · ${unknown} unknown.`;
}
return `
<div class="summary-dot" style="background:${dot}"></div>
<div class="summary-text">
<strong>${text}</strong>
<small>${sub}</small>
</div>
`;
}
async function load() {
// 1. Fetch + parse the probe-list config.
const yaml = await fetchText(CONFIG_URL);
if (!yaml) {
document.getElementById("grid").innerHTML =
`<div class="empty">Failed to load probe-list config. Check that <code>${CONFIG_URL}</code> is reachable (Vercel rewrites <code>/data/*</code> to ${REPO_BROWSE}/raw/branch/main/<em>$1</em>).</div>`;
document.getElementById("updated").textContent = "load failed";
return;
}
const sites = parseSites(yaml);
if (sites.length === 0) {
document.getElementById("grid").innerHTML =
`<div class="empty">No sites declared in <code>.upptimerc.yml</code>.</div>`;
return;
}
// 2. For each site, fetch its history JSONL in parallel.
const enriched = await Promise.all(
sites.map(async (site) => {
const slug = slugify(site.name);
const text = await fetchText(HISTORY_URL(slug));
const results = text ? parseJSONL(text) : [];
return { site, slug, results };
})
);
// 3. Render rows + summary.
const rowSummaries = enriched.map(({ results }) => {
const last = results[results.length - 1];
return {
status: !last ? "unknown" : last.success ? "up" : "down",
};
});
document.getElementById("summary").innerHTML = renderSummary(rowSummaries);
document.getElementById("grid").innerHTML = enriched
.map(({ site, results }) => renderRow(site, results))
.join("");
// Updated-at timestamp: latest probe across all sites.
const allTimestamps = enriched
.flatMap(({ results }) => results)
.map((r) => r.timestamp)
.filter(Boolean);
if (allTimestamps.length > 0) {
const latest = allTimestamps.sort().pop();
const ago = Math.round((Date.now() - new Date(latest).getTime()) / 60000);
document.getElementById("updated").innerHTML =
`last probe ${ago} min ago · <a href="${REPO_BROWSE}/src/branch/main/history">history</a>`;
} else {
document.getElementById("updated").innerHTML =
`no probe data yet · <a href="${REPO_BROWSE}">source</a>`;
}
}
load();
// Auto-refresh every 5 min — matches the probe cadence so the page
// catches up with new history without a hard reload.
setInterval(load, 5 * 60 * 1000);

137
site/index.html Normal file
View File

@ -0,0 +1,137 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="Live status for Molecules AI services. Probes refresh every 5 minutes.">
<title>Molecules AI · Status</title>
<style>
:root {
color-scheme: light dark;
--bg: #0a0a0a;
--card: #141414;
--line: #2a2a2a;
--ink: #e5e5e5;
--ink-soft: #999;
--green: #34d399;
--amber: #fbbf24;
--red: #f87171;
--blue: #60a5fa;
}
* { box-sizing: border-box; }
body {
margin: 0;
background: var(--bg);
color: var(--ink);
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
-webkit-font-smoothing: antialiased;
line-height: 1.5;
}
.wrap { max-width: 980px; margin: 0 auto; padding: 32px 24px; }
header {
display: flex; align-items: baseline; justify-content: space-between;
margin-bottom: 32px; padding-bottom: 20px; border-bottom: 1px solid var(--line);
}
h1 { font-size: 22px; margin: 0; font-weight: 600; }
.meta { font-size: 13px; color: var(--ink-soft); }
.meta a { color: var(--blue); text-decoration: none; }
.meta a:hover { text-decoration: underline; }
.summary {
background: var(--card); border: 1px solid var(--line);
border-radius: 12px; padding: 24px; margin-bottom: 24px;
display: flex; align-items: center; gap: 20px;
}
.summary-dot {
width: 18px; height: 18px; border-radius: 50%;
flex-shrink: 0;
}
.summary-text strong { font-size: 18px; display: block; margin-bottom: 2px; }
.summary-text small { color: var(--ink-soft); font-size: 13px; }
.grid {
display: grid; gap: 12px;
}
.row {
background: var(--card); border: 1px solid var(--line);
border-radius: 10px; padding: 16px 20px;
display: grid;
grid-template-columns: 28px 1fr auto auto;
align-items: center; gap: 16px;
}
.dot {
width: 12px; height: 12px; border-radius: 50%;
box-shadow: 0 0 8px currentColor;
}
.dot.up { background: var(--green); color: var(--green); }
.dot.down { background: var(--red); color: var(--red); }
.dot.unknown { background: var(--ink-soft); color: var(--ink-soft); box-shadow: none; }
.row-name { font-weight: 500; min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
.row-name a { color: var(--ink); text-decoration: none; }
.row-name a:hover { color: var(--blue); }
.row-name .url { display: block; font-size: 11px; color: var(--ink-soft); font-family: ui-monospace, 'SF Mono', monospace; }
.row-latency {
font-size: 13px; color: var(--ink-soft); font-variant-numeric: tabular-nums;
text-align: right; min-width: 70px;
}
.row-spark {
display: flex; gap: 2px; align-items: flex-end; height: 20px;
}
.row-spark span {
width: 3px; background: var(--green); display: block;
border-radius: 1px; opacity: 0.85;
}
.row-spark span.fail { background: var(--red); }
footer {
margin-top: 40px; padding-top: 20px; border-top: 1px solid var(--line);
font-size: 12px; color: var(--ink-soft); text-align: center;
}
footer a { color: var(--ink-soft); text-decoration: underline; }
.empty {
text-align: center; padding: 48px 24px; color: var(--ink-soft);
}
.skel {
height: 60px; background: var(--card); border: 1px solid var(--line);
border-radius: 10px; margin-bottom: 12px;
animation: pulse 1.6s ease-in-out infinite;
}
@keyframes pulse { 0%, 100% { opacity: 0.5; } 50% { opacity: 0.9; } }
@media (prefers-color-scheme: light) {
:root {
--bg: #fafafa; --card: #fff; --line: #e5e5e5; --ink: #1a1a1a; --ink-soft: #666;
}
}
</style>
</head>
<body>
<div class="wrap">
<header>
<h1>Molecules AI · Status</h1>
<div class="meta" id="updated">checking…</div>
</header>
<div class="summary" id="summary">
<div class="summary-dot" style="background:var(--ink-soft)"></div>
<div class="summary-text">
<strong>Loading current status…</strong>
<small>Fetching latest probe results.</small>
</div>
</div>
<div class="grid" id="grid">
<div class="skel"></div><div class="skel"></div><div class="skel"></div>
</div>
<footer>
Probes run every 5 minutes via Gitea Actions cron.
Source: <a href="https://git.moleculesai.app/molecule-ai/molecule-ai-status">molecule-ai/molecule-ai-status</a> ·
Probe binary: <a href="https://git.moleculesai.app/molecule-ai/molecule-ai-uptime-probe">molecule-ai-uptime-probe</a>
</footer>
</div>
<script src="./app.js"></script>
</body>
</html>

33
site/vercel.json Normal file
View File

@ -0,0 +1,33 @@
{
"version": 2,
"name": "molecule-ai-status",
"rewrites": [
{
"source": "/data/(.*)",
"destination": "https://git.moleculesai.app/molecule-ai/molecule-ai-status/raw/branch/main/$1"
}
],
"headers": [
{
"source": "/(.*)",
"headers": [
{ "key": "X-Frame-Options", "value": "DENY" },
{ "key": "X-Content-Type-Options", "value": "nosniff" },
{ "key": "Referrer-Policy", "value": "strict-origin-when-cross-origin" },
{ "key": "Permissions-Policy", "value": "camera=(), microphone=(), geolocation=()" }
]
},
{
"source": "/(index.html|app.js)",
"headers": [
{ "key": "Cache-Control", "value": "public, max-age=60, s-maxage=60" }
]
},
{
"source": "/data/(.*)",
"headers": [
{ "key": "Cache-Control", "value": "public, max-age=60, s-maxage=60" }
]
}
]
}