From 2588b4ecbcbc03936ca439780d7a1d1e84a68aae Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Mon, 11 May 2026 00:36:20 -0700 Subject: [PATCH] =?UTF-8?q?feat(ci):=20main-red=20watchdog=20(Option=20C?= =?UTF-8?q?=20of=20main-never-red=20directive)=20=E2=80=94=20closes=20#420?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a sentinel that detects post-merge CI red on `main` and files an idempotent `[main-red] {repo}: {SHA[:10]}` issue. Auto-closes the issue when main returns to green. Emits a Loki-shaped JSON event for the operator-host observability pipeline. Pattern source: CP `0adf2098` (ci-required-drift). Simpler scope here — one source surface (combined commit status of main HEAD) versus three in CP. Same `ApiError`-raises-on-non-2xx contract per `feedback_api_helper_must_raise_not_return_dict` so the duplicate-issue regression class stays closed. Does NOT auto-revert. Option B is explicitly rejected per `feedback_no_such_thing_as_flakes` + `feedback_fix_root_not_symptom`. The watchdog files an alarm; humans fix forward. Files: - .gitea/workflows/main-red-watchdog.yml — hourly `5 * * * *` cron + workflow_dispatch (no inputs, per `feedback_gitea_workflow_dispatch_inputs_unsupported`). - .gitea/scripts/main-red-watchdog.py — sidecar with `--dry-run`. - tests/test_main_red_watchdog.py — 26 pytest cases. Tests (26 / 26 passing): - is_red detector across failure/error/pending/success state combos - happy path: green main → no writes - red detected: POST issue with correct title + body listing each failed context + label apply - idempotent: existing issue PATCHed, NOT duplicated - auto-close: green at new SHA → close prior `[main-red]` w/ comment - auto-close skipped when main pending (don't lose the breadcrumb) - HTTP-failure: `api()` raises ApiError; `list_open_red_issues` and `find_open_issue_for_sha` and `run_once` ALL propagate (regression guards for `feedback_api_helper_must_raise_not_return_dict`) - JSON-decode failure raises when expect_json=True; opt-in raw OK - --dry-run skips all writes - title format `[main-red] {repo}: {SHA[:10]}` - Gitea branch response shape tolerance (`commit.id` OR `commit.sha`) - Loki emitter survives `logger` not installed / subprocess failure - runtime env guard exits when required vars missing Hostile self-review proven: 2 transient-error tests FAIL on a pre-fix implementation (verified by injecting `try: ... except ApiError: return []` into `list_open_red_issues` and running pytest — both transient-error guards flipped red with `DID NOT RAISE`). Live dry-run against molecule-ai/molecule-core main confirms the script parses the real Gitea combined-status response correctly (current main is in fact red at cb716f96). Replication to other repos (operator-config, internal, molecule-controlplane, hermes-agent, etc.) is out of scope for this PR — molecule-core pilot only, per task brief. Tracking: #420. --- .gitea/scripts/main-red-watchdog.py | 589 +++++++++++++++++++++++ .gitea/workflows/main-red-watchdog.yml | 94 ++++ tests/test_main_red_watchdog.py | 626 +++++++++++++++++++++++++ 3 files changed, 1309 insertions(+) create mode 100755 .gitea/scripts/main-red-watchdog.py create mode 100644 .gitea/workflows/main-red-watchdog.yml create mode 100644 tests/test_main_red_watchdog.py diff --git a/.gitea/scripts/main-red-watchdog.py b/.gitea/scripts/main-red-watchdog.py new file mode 100755 index 00000000..85e4de36 --- /dev/null +++ b/.gitea/scripts/main-red-watchdog.py @@ -0,0 +1,589 @@ +#!/usr/bin/env python3 +"""main-red-watchdog — Option C of the "main NEVER goes red" directive. + +Tracking: molecule-core#420. + +What it does (one cron tick): + 1. GET /api/v1/repos/{owner}/{repo}/branches/{watch_branch} + → current HEAD SHA on the watched branch. + 2. GET /api/v1/repos/{owner}/{repo}/commits/{SHA}/status + → combined status + per-context statuses. + 3. If combined state is `failure` (or any individual status is + `failure`): open or PATCH an idempotent + `[main-red] {repo}: {SHA[:10]}` issue. Body lists each failed + status context with `target_url` + `description`. + 4. If combined state is `success`: close any open `[main-red] + {repo}: ...` issue on a previous SHA with a + "main returned to green at SHA {current_SHA}" comment. + 5. Emit one Loki-shaped JSON line via `logger -t main-red-watchdog` + so `reference_obs_stack_phase1`'s Vector → Loki path ingests an + alert event (queryable in Grafana as + `{tenant="operator-host"} |~ "main-red-watchdog"`). + +What it does NOT do: + - Auto-revert anything. Option B is explicitly rejected per + `feedback_no_such_thing_as_flakes` + `feedback_fix_root_not_symptom`. + - Page on its own failures. If api() raises ApiError (transient + Gitea outage), the workflow run fails LOUDLY by re-raise — exactly + the contract `feedback_api_helper_must_raise_not_return_dict` + enforces. Silent fallthrough would re-introduce the duplicate-issue + regression class. + - Exit non-zero on RED. The issue IS the alarm; failing the watchdog + on red would double-page (red workflow + open issue) and create + silent-loop risk if the watchdog itself flakes. + +Idempotency strategy: + Title is keyed on `{SHA[:10]}` (commit-scoped), NOT just `main`. + Rationale: + - A fix-forward changes HEAD → next cron tick sees a new SHA; + auto-close logic closes the prior `[main-red] OLD_SHA` issue and + (if the new HEAD is also red, e.g. a different test fails) files + a fresh `[main-red] NEW_SHA`. Lineage is preserved. + - A revert that happens to land back on a previously-red SHA + (rare) would refer to a CLOSED issue; the watchdog never reopens. + That's a deliberate trade-off — the operator will see the latest + open issue's `closed` event in the activity feed. + +This module is import-safe: tests import individual functions without +invoking main(), so module-level reads use env-with-default and the +runtime contract enforcement lives in `_require_runtime_env()`. + +Run locally (dry-run, no API mutation): + GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\ + WATCH_BRANCH=main RED_LABEL=tier:high \\ + python3 .gitea/scripts/main-red-watchdog.py --dry-run +""" +from __future__ import annotations + +import argparse +import json +import os +import shutil +import subprocess +import sys +import urllib.error +import urllib.parse +import urllib.request +from typing import Any + + +# -------------------------------------------------------------------------- +# Environment +# -------------------------------------------------------------------------- +def _env(key: str, *, default: str = "") -> str: + """Read an env var with a default. Module-import-safe — tests can + import this script without setting the full env contract.""" + return os.environ.get(key, default) + + +GITEA_TOKEN = _env("GITEA_TOKEN") +GITEA_HOST = _env("GITEA_HOST") +REPO = _env("REPO") +WATCH_BRANCH = _env("WATCH_BRANCH", default="main") +RED_LABEL = _env("RED_LABEL", default="tier:high") + +OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "") +API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else "" + +# Title prefix — kept short and stable so the idempotency search can +# match by exact title without parsing. +TITLE_PREFIX = "[main-red]" + + +def _require_runtime_env() -> None: + """Enforce env contract — called from `main()` only. + + Tests import individual functions without setting the full env + contract. Mirrors the CP `ci-required-drift.py` pattern so the + runtime guard is a single chokepoint. + """ + for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO", "WATCH_BRANCH", "RED_LABEL"): + if not os.environ.get(key): + sys.stderr.write(f"::error::missing required env var: {key}\n") + sys.exit(2) + + +# -------------------------------------------------------------------------- +# Tiny HTTP helper — raises on non-2xx + on JSON-decode-of-expected-JSON. +# -------------------------------------------------------------------------- +class ApiError(RuntimeError): + """Raised when a Gitea API call cannot be trusted to have succeeded. + + Covers non-2xx HTTP status AND 2xx with an unparseable JSON body on + endpoints documented to return JSON. Callers that swallow this and + proceed risk e.g. creating duplicate `[main-red]` issues when a + transient 500 hides an existing match. Per + `feedback_api_helper_must_raise_not_return_dict`: soft-failure is + opt-in via `expect_json=False`, never the default. + """ + + +def api( + method: str, + path: str, + *, + body: dict | None = None, + query: dict[str, str] | None = None, + expect_json: bool = True, +) -> tuple[int, Any]: + """Tiny HTTP helper around urllib. + + Raises ApiError on any non-2xx response, and on JSON-decode failure + when `expect_json=True` (the default for read-shaped paths). Mirrors + the CP ci-required-drift.py contract exactly so behaviour is + cross-checkable. + """ + url = f"{API}{path}" + if query: + url = f"{url}?{urllib.parse.urlencode(query)}" + data = None + headers = { + "Authorization": f"token {GITEA_TOKEN}", + "Accept": "application/json", + } + if body is not None: + data = json.dumps(body).encode("utf-8") + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, method=method, data=data, headers=headers) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read() + status = resp.status + except urllib.error.HTTPError as e: + raw = e.read() + status = e.code + + if not (200 <= status < 300): + snippet = raw[:500].decode("utf-8", errors="replace") if raw else "" + raise ApiError(f"{method} {path} → HTTP {status}: {snippet}") + + if not raw: + return status, None + try: + return status, json.loads(raw) + except json.JSONDecodeError as e: + if expect_json: + raise ApiError( + f"{method} {path} → HTTP {status} but body is not JSON: {e}" + ) from e + # Opt-in raw fallthrough for endpoints with known echo-quirks + # (`feedback_gitea_create_api_unparseable_response`). Caller + # MUST verify success via a follow-up GET, not by trusting body. + return status, {"_raw": raw.decode("utf-8", errors="replace")} + + +# -------------------------------------------------------------------------- +# Gitea reads +# -------------------------------------------------------------------------- +def get_head_sha(branch: str) -> str: + """HEAD SHA of `branch`. Raises ApiError on non-2xx.""" + _, body = api("GET", f"/repos/{OWNER}/{NAME}/branches/{branch}") + if not isinstance(body, dict): + raise ApiError(f"branch {branch} response not a JSON object") + commit = body.get("commit") + if not isinstance(commit, dict): + raise ApiError(f"branch {branch} response missing `commit` object") + sha = commit.get("id") or commit.get("sha") + if not isinstance(sha, str) or len(sha) < 7: + raise ApiError(f"branch {branch} response has no usable commit SHA") + return sha + + +def get_combined_status(sha: str) -> dict: + """Combined commit status for `sha`. Gitea returns: + { + "state": "success" | "failure" | "pending" | "error", + "statuses": [ + {"context": "...", "state": "success|failure|pending|error", + "target_url": "...", "description": "..."}, + ... + ], + ... + } + Raises ApiError on non-2xx. + """ + _, body = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status") + if not isinstance(body, dict): + raise ApiError(f"status for {sha} response not a JSON object") + return body + + +def is_red(status: dict) -> tuple[bool, list[dict]]: + """Return (is_red, failed_statuses). + + A commit is "red" if combined state is `failure` OR any individual + status entry is in {`failure`, `error`}. `pending` and `success` + do not trip the watchdog — pending means CI is still running, and + that's the normal state immediately after a merge. + + `failed_statuses` is the list of per-context entries whose own + `state` is in the red set; useful for the issue body. + """ + combined = status.get("state") + statuses = status.get("statuses") or [] + red_states = {"failure", "error"} + failed = [ + s for s in statuses + if isinstance(s, dict) and s.get("state") in red_states + ] + return (combined in red_states or bool(failed), failed) + + +# -------------------------------------------------------------------------- +# Issue file / update / close +# -------------------------------------------------------------------------- +def title_for(sha: str) -> str: + """Idempotency key — `[main-red] {repo}: {SHA[:10]}`. + + Commit-scoped. A fix-forward to a new SHA produces a new title; the + prior issue auto-closes via `close_open_red_issues_for_other_shas`. + """ + return f"{TITLE_PREFIX} {REPO}: {sha[:10]}" + + +def list_open_red_issues() -> list[dict]: + """All open issues whose title starts with `[main-red] {repo}: `. + + Per Five-Axis review on CP#112 (`feedback_api_helper_must_raise_not_return_dict`): + api() raises on non-2xx; we let it propagate. Returning [] on a + transient 500 would cause auto-close to skip the cleanup AND the + file-or-update path to POST a duplicate — exactly the regression + class the helper-raises contract closes. + + Gitea issue search returns at most 50/page; we only need open + `[main-red]` issues which are by design ≤ 1 at any time per repo, + so a single page is enough. + """ + _, results = api( + "GET", + f"/repos/{OWNER}/{NAME}/issues", + query={"state": "open", "type": "issues", "limit": "50"}, + ) + if not isinstance(results, list): + raise ApiError( + f"issue search returned non-list body (got {type(results).__name__})" + ) + prefix = f"{TITLE_PREFIX} {REPO}: " + return [i for i in results if isinstance(i, dict) + and isinstance(i.get("title"), str) + and i["title"].startswith(prefix)] + + +def find_open_issue_for_sha(sha: str) -> dict | None: + """Return the existing open `[main-red] {repo}: {SHA[:10]}` issue, + or None if no such issue is open. + + `None` means "search succeeded, no match" — NOT "search failed". + api() raises ApiError on any non-2xx; the caller can let that + propagate so a transient outage fails loudly instead of silently + duplicating. + """ + target = title_for(sha) + for issue in list_open_red_issues(): + if issue.get("title") == target: + return issue + return None + + +def render_body(sha: str, failed: list[dict], debug: dict) -> str: + """Issue body. Markdown. Mirrors CP#112's render_body shape.""" + lines = [ + f"# Main is RED on `{REPO}` at `{sha[:10]}`", + "", + f"Commit: ", + "", + "Auto-filed by `.gitea/workflows/main-red-watchdog.yml` (Option C " + "of the [main-never-red directive]" + f"(https://{GITEA_HOST}/molecule-ai/molecule-core/issues/420)). " + "Per `feedback_no_such_thing_as_flakes` + " + "`feedback_fix_root_not_symptom`: investigate the root cause; do " + "NOT revert as a reflex. The watchdog itself never reverts.", + "", + "## Failed status contexts", + "", + ] + if not failed: + lines.append( + "_(Combined state reported `failure`/`error` but no per-context " + "entries were in a red state. This usually means a CI emitter " + "set combined-status directly without a per-context status. " + "Check the most recent workflow run for `main` and trace from " + "there.)_" + ) + else: + for s in failed: + ctx = s.get("context", "(no context)") + state = s.get("state", "(no state)") + url = s.get("target_url") or "" + desc = (s.get("description") or "").strip() + entry = f"- **{ctx}** — `{state}`" + if url: + entry += f" → [logs]({url})" + if desc: + entry += f"\n - {desc}" + lines.append(entry) + lines.extend([ + "", + "## Resolution path", + "", + "1. Read the failed logs (links above).", + "2. If reproducible locally, fix forward in a PR targeting `main`.", + "3. If the failure is a real flake — STOP. Per " + "`feedback_no_such_thing_as_flakes`, intermittent failures are " + "real bugs. Investigate to root cause; do not mark as flake.", + "4. If the failure is blocking unrelated work for >1 hour, file a " + "follow-up issue and assign someone. Do NOT revert without a " + "human GO per `feedback_prod_apply_needs_hongming_chat_go` " + "(branch protection is a prod surface).", + "", + "## Debug", + "", + "```json", + json.dumps(debug, indent=2, sort_keys=True), + "```", + "", + "_This issue is idempotent: the watchdog runs hourly at `:05` " + "and edits this body in place. When `main` returns to green, the " + "watchdog will close this issue automatically with a " + "\"main returned to green\" comment._", + ]) + return "\n".join(lines) + + +def emit_loki_event(event_type: str, sha: str, failed_contexts: list[str]) -> None: + """Emit a JSON line to syslog tag `main-red-watchdog` for + `reference_obs_stack_phase1` (Vector → Loki). + + Best-effort: if `logger` isn't on PATH (e.g. local dev macOS without + util-linux logger), print to stderr instead. The Gitea Actions + Ubuntu runner has util-linux preinstalled. + + Loki labels: the workflow runs on the Ubuntu runner where Vector is + NOT configured (Vector lives on the operator host + tenants per + `reference_obs_stack_phase1`). The Loki line is still emitted as + stdout JSON so the workflow log itself is parseable; treat the + syslog call as belt-and-braces for the cases where this script is + invoked from a host that DOES have Vector (e.g. operator-host cron + fallback in a follow-up PR). + """ + payload = { + "event_type": event_type, + "repo": REPO, + "sha": sha, + "failed_contexts": failed_contexts, + } + line = json.dumps(payload, sort_keys=True) + # Always print to stdout so the workflow log captures it (machine- + # readable; `gitea run logs` + Loki ingestion via the operator-host + # journald → Vector → Loki path will see this from runners that + # forward stdout). Loki query: + # {source="gitea-actions"} |~ "main_red_detected" + print(f"main-red-watchdog event: {line}") + # Best-effort syslog tag so a future "run from operator-host cron" + # path picks it up directly via the existing Vector pipeline. + if shutil.which("logger"): + try: + subprocess.run( + ["logger", "-t", "main-red-watchdog", line], + check=False, + timeout=5, + ) + except (OSError, subprocess.SubprocessError) as e: + sys.stderr.write(f"::warning::logger call failed: {e}\n") + + +def file_or_update_red( + sha: str, + failed: list[dict], + debug: dict, + *, + dry_run: bool = False, +) -> None: + """Open a new `[main-red] {repo}: {SHA[:10]}` issue, or PATCH the + existing one's body. Idempotent by title.""" + title = title_for(sha) + body = render_body(sha, failed, debug) + + if dry_run: + print(f"::notice::[dry-run] would file/update main-red issue for {sha[:10]}") + print("::group::[dry-run] title") + print(title) + print("::endgroup::") + print("::group::[dry-run] body") + print(body) + print("::endgroup::") + return + + existing = find_open_issue_for_sha(sha) + if existing: + num = existing["number"] + api("PATCH", f"/repos/{OWNER}/{NAME}/issues/{num}", body={"body": body}) + print(f"::notice::Updated existing main-red issue #{num} for {sha[:10]}") + return + + _, created = api( + "POST", + f"/repos/{OWNER}/{NAME}/issues", + body={"title": title, "body": body, "labels": []}, + ) + if not isinstance(created, dict): + raise ApiError("POST issue response not a JSON object") + new_num = created.get("number") + print(f"::warning::Filed new main-red issue #{new_num} for {sha[:10]}") + + # Apply RED_LABEL by id. Gitea's add-labels endpoint takes IDs, not + # names (`feedback_gitea_label_delete_by_id` — same rule for add). + # Best-effort: label failure is logged but does not fail the run. + try: + _, labels = api("GET", f"/repos/{OWNER}/{NAME}/labels") + except ApiError as e: + sys.stderr.write(f"::warning::could not list labels: {e}\n") + return + label_id = None + if isinstance(labels, list): + for lbl in labels: + if isinstance(lbl, dict) and lbl.get("name") == RED_LABEL: + label_id = lbl.get("id") + break + if label_id is not None and new_num: + try: + api( + "POST", + f"/repos/{OWNER}/{NAME}/issues/{new_num}/labels", + body={"labels": [label_id]}, + ) + except ApiError as e: + sys.stderr.write( + f"::warning::could not apply label '{RED_LABEL}' to #{new_num}: {e}\n" + ) + else: + sys.stderr.write(f"::warning::label '{RED_LABEL}' not found on repo\n") + + +def close_open_red_issues_for_other_shas( + current_sha: str, + *, + dry_run: bool = False, +) -> int: + """When main is green at current_sha, close any open `[main-red]` + issues whose title references a different SHA. Returns the number + of issues closed. + + Lineage note: we only close issues whose title prefix matches; if + a human renamed the issue or added a suffix this won't touch it. + That's intentional — manual editorial state takes precedence. + """ + target_title = title_for(current_sha) + open_red = list_open_red_issues() + closed = 0 + for issue in open_red: + if issue.get("title") == target_title: + # Same SHA — caller should not have invoked this if main is + # green. Skip defensively. + continue + num = issue.get("number") + if not isinstance(num, int): + continue + comment = ( + f"`main` returned to green at SHA `{current_sha}` " + f"(). " + "Closing automatically. If the underlying root cause is " + "not yet understood, reopen this issue and file a " + "postmortem — green-by-flake is still a bug per " + "`feedback_no_such_thing_as_flakes`." + ) + if dry_run: + print(f"::notice::[dry-run] would close issue #{num} ({issue.get('title')})") + closed += 1 + continue + # Comment first, then close. Order matters: a closed issue can + # still receive comments, but the activity-feed ordering reads + # better with the explanation arriving just before the close. + api( + "POST", + f"/repos/{OWNER}/{NAME}/issues/{num}/comments", + body={"body": comment}, + ) + api( + "PATCH", + f"/repos/{OWNER}/{NAME}/issues/{num}", + body={"state": "closed"}, + ) + print(f"::notice::Closed main-red issue #{num} (green at {current_sha[:10]})") + closed += 1 + return closed + + +# -------------------------------------------------------------------------- +# Main +# -------------------------------------------------------------------------- +def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: + p = argparse.ArgumentParser( + prog="main-red-watchdog", + description="Detect post-merge CI red on the watched branch and " + "file an idempotent issue. Option C of the main-never-red directive.", + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Detect + print the would-be issue title/body to stdout; do " + "NOT POST/PATCH/close any issues. Useful for local testing.", + ) + return p.parse_args(argv) + + +def run_once(*, dry_run: bool = False) -> int: + """One watchdog tick. Returns 0 on green or red-issue-filed; lets + ApiError propagate on transient outage (workflow run fails loudly, + which is correct per the helper-raises contract).""" + sha = get_head_sha(WATCH_BRANCH) + status = get_combined_status(sha) + red, failed = is_red(status) + + debug = { + "branch": WATCH_BRANCH, + "sha": sha, + "combined_state": status.get("state"), + "failed_contexts": [s.get("context") for s in failed], + "all_contexts": [ + {"context": s.get("context"), "state": s.get("state")} + for s in (status.get("statuses") or []) + if isinstance(s, dict) + ], + } + + if red: + failed_ctxs = [s.get("context") for s in failed if s.get("context")] + emit_loki_event("main_red_detected", sha, failed_ctxs) + print(f"::warning::main is RED at {sha[:10]} on {WATCH_BRANCH}: " + f"{len(failed)} failed context(s)") + file_or_update_red(sha, failed, debug, dry_run=dry_run) + else: + # Green (or pending — pending is treated as not-red so we don't + # spam during the post-merge CI window). Close any stale issues + # from earlier SHAs only when we're actually green; pending + # means CI hasn't finished and the prior issue might still be + # accurate. + if status.get("state") == "success": + closed = close_open_red_issues_for_other_shas(sha, dry_run=dry_run) + if closed: + emit_loki_event( + "main_returned_to_green", sha, + [], + ) + print(f"::notice::main is GREEN at {sha[:10]} on {WATCH_BRANCH} " + f"(closed {closed} stale issue(s))") + else: + print(f"::notice::main is PENDING at {sha[:10]} on {WATCH_BRANCH} " + f"(combined state={status.get('state')!r}; no action)") + return 0 + + +def main(argv: list[str] | None = None) -> int: + args = _parse_args(argv) + _require_runtime_env() + return run_once(dry_run=args.dry_run) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.gitea/workflows/main-red-watchdog.yml b/.gitea/workflows/main-red-watchdog.yml new file mode 100644 index 00000000..2dbec72b --- /dev/null +++ b/.gitea/workflows/main-red-watchdog.yml @@ -0,0 +1,94 @@ +# main-red-watchdog — hourly sentinel for post-merge CI red on `main`. +# +# RFC: hongming "main NEVER goes red" directive, Option C of the four- +# option ladder (B = auto-revert is explicitly rejected per +# `feedback_no_such_thing_as_flakes` + `feedback_fix_root_not_symptom`). +# Tracking issue: molecule-core#420. +# +# What it does: +# 1. GET branches/main → HEAD SHA +# 2. GET commits/{SHA}/status → combined status +# 3. If combined is `failure` (or any individual status is `failure`): +# open or PATCH an idempotent `[main-red] {repo}: {SHA[:10]}` issue +# with each failed context + target_url + description. +# 4. If combined is `success` and a prior `[main-red] ...` issue exists, +# close it with a "main returned to green at SHA ..." comment. +# 5. Emit a Loki-shaped JSON line via `logger -t main-red-watchdog` for +# `reference_obs_stack_phase1` ingestion via Vector. +# +# What it does NOT do: +# - Auto-revert anything. Option B is rejected by directive. +# - Mutate branch protection. (See AGENTS.md boundaries.) +# - Fail the workflow on red. The issue IS the alarm — failing the +# watchdog would create a silent-loop where a flake in the watchdog +# itself hides actual main-red signal. Exit 0 unless api() raises +# ApiError (transient Gitea outage → fail loudly per +# `feedback_api_helper_must_raise_not_return_dict`). +# +# Pattern source: molecule-controlplane `0adf2098`'s ci-required-drift.yml +# (just merged 2026-05-11). Same shape (cron + dispatch + sidecar Python + +# idempotent-by-title issue), simpler scope (1 source, not 3). + +name: main-red-watchdog + +# IMPORTANT — Gitea 1.22.6 parser quirk per +# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an +# `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as +# "unknown on type" when `workflow_dispatch.inputs.X` is present. Revisit +# when Gitea ≥ 1.23 is fleet-wide. +on: + schedule: + # Hourly at :05 — task spec calls for "off-zero" (`5 * * * *`), + # offset from :17 (ci-required-drift) and :00 (peak cron load). + - cron: '5 * * * *' + workflow_dispatch: + +# Read commit status + branch ref + issues; write issues (open/PATCH/close). +permissions: + contents: read + issues: write + +# Workflow-scoped serialisation — two simultaneous runs would race on the +# `[main-red] {SHA}` open/PATCH path. Idempotent by title, but parallel +# POSTs can produce duplicates before the title search dedup wins. +concurrency: + group: main-red-watchdog + cancel-in-progress: false + +jobs: + watchdog: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Check out repo (script lives at .gitea/scripts/) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Python (stdlib only — no PyYAML needed here) + # The script uses stdlib urllib + json. No PyYAML required (CP's + # drift detector needs it for AST parsing; we don't). Pin to the + # same 3.12 hermetic interpreter CP uses so the test/runtime + # versions stay aligned across watchdog suites. + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: '3.12' + + - name: Run main-red watchdog + env: + # GITEA_TOKEN reads commit status + writes issues. Falls back + # to the auto-injected GITHUB_TOKEN if the org-level secret + # isn't set (transitional repos), matching the same pattern + # used by deploy-pipeline.yml + ci-required-drift.yml. + GITEA_TOKEN: ${{ secrets.GITEA_TOKEN || secrets.GITHUB_TOKEN }} + GITEA_HOST: git.moleculesai.app + REPO: ${{ github.repository }} + # Branch under watch. `main` per directive; staging not + # included here — staging green is a separate gate + # (`feedback_staging_e2e_merge_gate`). + WATCH_BRANCH: 'main' + # Issue label applied on file/open. `tier:high` exists in the + # molecule-core label set (verified 2026-05-11, label id 9). + # Rationale for high: main red blocks the promotion train and + # poisons every PR's auto-rebase base; treat as a fire even + # if intermittent. + RED_LABEL: 'tier:high' + run: python3 .gitea/scripts/main-red-watchdog.py diff --git a/tests/test_main_red_watchdog.py b/tests/test_main_red_watchdog.py new file mode 100644 index 00000000..1b14fe27 --- /dev/null +++ b/tests/test_main_red_watchdog.py @@ -0,0 +1,626 @@ +"""Tests for `.gitea/scripts/main-red-watchdog.py` — Option C of the +main-never-red directive (tracking: molecule-core#420). + +Covers: + - Happy path: main is green, no issue created. + - Red detected: issue opened with correct title/body containing each + failed context. + - Idempotent: existing `[main-red] {repo}: {SHA[:10]}` issue is + PATCHed in place, NOT duplicated. + - Auto-close: when main returns to green, prior `[main-red]` issues + for other SHAs are closed with a comment. + - HTTP-failure: api() raises ApiError on non-2xx, NOT silently + swallowed → `find_open_issue_for_sha` and `list_open_red_issues` + propagate, blocking the duplicate-write regression class per + `feedback_api_helper_must_raise_not_return_dict`. + - --dry-run: no API mutation; rendered title/body to stdout. + - is_red detector logic across all combined/per-context state + combinations (failure, error, pending, success). + +Hostile self-review proof (`feedback_dev_sop_phase_1_to_4`): + - `test_find_open_issue_for_sha_raises_on_transient_error` exercises + the regression class — a pre-fix implementation that returned + `[]`/None on api() failure would fall through and POST a duplicate. + Verified by stashing the script's `raise ApiError` and re-running: + test FAILS as required. + - `test_file_or_update_patches_existing_issue` asserts NO POST when + an open issue exists. A pre-fix idempotency bug (always-POST) + would fail this. + +Run: + python3 -m pytest tests/test_main_red_watchdog.py -v + +Dependencies: stdlib + pytest. No network. No live Gitea calls. +""" +from __future__ import annotations + +import importlib.util +import json +import os +import sys +import urllib.error +from pathlib import Path +from unittest import mock + +import pytest + + +# -------------------------------------------------------------------------- +# Module-import fixture +# -------------------------------------------------------------------------- +SCRIPT_PATH = ( + Path(__file__).resolve().parent.parent + / ".gitea" + / "scripts" + / "main-red-watchdog.py" +) + + +@pytest.fixture(scope="module") +def wd_module(): + """Import the script as a module under a known env.""" + env = { + "GITEA_TOKEN": "test-token", + "GITEA_HOST": "git.example.test", + "REPO": "owner/repo", + "WATCH_BRANCH": "main", + "RED_LABEL": "tier:high", + } + with mock.patch.dict(os.environ, env, clear=False): + spec = importlib.util.spec_from_file_location( + "main_red_watchdog", SCRIPT_PATH + ) + m = importlib.util.module_from_spec(spec) + spec.loader.exec_module(m) + # Force-set globals from env (they were captured at import time + # before our patch.dict took effect on subsequent runs within + # the same pytest session — same pattern as CP#112 tests). + m.GITEA_TOKEN = env["GITEA_TOKEN"] + m.GITEA_HOST = env["GITEA_HOST"] + m.REPO = env["REPO"] + m.WATCH_BRANCH = env["WATCH_BRANCH"] + m.RED_LABEL = env["RED_LABEL"] + m.OWNER, m.NAME = "owner", "repo" + m.API = f"https://{env['GITEA_HOST']}/api/v1" + yield m + + +# -------------------------------------------------------------------------- +# Stub api() helper — records calls + dispatches by (method, path). +# -------------------------------------------------------------------------- +def _make_stub_api(responses: dict): + """Build a fake `api()` callable. + + `responses` maps (method, path) tuples to either: + - (status_int, body) → returned as-is + - Exception instance → raised + Calls are recorded in `.calls` for assertion. + """ + class StubApi: + def __init__(self): + self.calls: list[tuple] = [] + + def __call__(self, method, path, *, body=None, query=None, expect_json=True): + self.calls.append((method, path, body, query)) + key = (method, path) + if key not in responses: + raise AssertionError( + f"unexpected api call: {method} {path} (no stub registered)" + ) + r = responses[key] + if isinstance(r, Exception): + raise r + return r + + return StubApi() + + +# Sample SHA used throughout. 40 chars per Gitea convention. +SHA_RED = "deadbeefcafe1234567890abcdef000011112222" +SHA_GREEN = "ababababcdcdcdcd0000111122223333deadc0de" + + +def _branches_response(sha: str) -> dict: + """Shape Gitea returns from /repos/{o}/{r}/branches/{name}.""" + return {"name": "main", "commit": {"id": sha}} + + +def _combined_status(state: str, statuses: list[dict] | None = None) -> dict: + """Shape Gitea returns from /commits/{sha}/status.""" + return {"state": state, "statuses": statuses or []} + + +# -------------------------------------------------------------------------- +# is_red detector +# -------------------------------------------------------------------------- +def test_is_red_combined_failure(wd_module): + red, failed = wd_module.is_red(_combined_status("failure", [ + {"context": "ci/test", "state": "failure"}, + ])) + assert red is True + assert len(failed) == 1 + assert failed[0]["context"] == "ci/test" + + +def test_is_red_combined_error(wd_module): + """`error` state (CI infra failed) is also red.""" + red, failed = wd_module.is_red(_combined_status("error", [ + {"context": "ci/test", "state": "error"}, + ])) + assert red is True + assert failed[0]["state"] == "error" + + +def test_is_red_combined_success(wd_module): + red, failed = wd_module.is_red(_combined_status("success", [ + {"context": "ci/test", "state": "success"}, + ])) + assert red is False + assert failed == [] + + +def test_is_red_combined_pending(wd_module): + """Pending = CI still running. Not red, but not green either; the + main flow handles green vs pending separately.""" + red, failed = wd_module.is_red(_combined_status("pending", [ + {"context": "ci/test", "state": "pending"}, + ])) + assert red is False + assert failed == [] + + +def test_is_red_individual_failure_under_pending(wd_module): + """A single failed context counts as red even if combined is `pending` + (matrix half-failed, half-still-running). Catches the case where + Gitea aggregator hasn't rolled up yet.""" + red, failed = wd_module.is_red(_combined_status("pending", [ + {"context": "ci/lint", "state": "success"}, + {"context": "ci/test", "state": "failure"}, + {"context": "ci/build", "state": "pending"}, + ])) + assert red is True + assert [s["context"] for s in failed] == ["ci/test"] + + +def test_is_red_no_statuses(wd_module): + """No statuses at all (commit pre-CI or never reported) = not red.""" + red, failed = wd_module.is_red(_combined_status("pending", [])) + assert red is False + assert failed == [] + + +# -------------------------------------------------------------------------- +# Happy path — main is green, no issue created +# -------------------------------------------------------------------------- +def test_happy_path_no_issue_when_green(wd_module, monkeypatch): + """main green + no existing red issues → only reads, no writes.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_GREEN)), + ("GET", f"/repos/owner/repo/commits/{SHA_GREEN}/status"): ( + 200, _combined_status("success", [ + {"context": "ci/test", "state": "success"}, + ]), + ), + ("GET", "/repos/owner/repo/issues"): (200, []), # no open red issues + }) + monkeypatch.setattr(wd_module, "api", stub) + + rc = wd_module.run_once(dry_run=False) + assert rc == 0 + methods = [c[0] for c in stub.calls] + assert "POST" not in methods, f"unexpected POST: {stub.calls}" + assert "PATCH" not in methods, f"unexpected PATCH: {stub.calls}" + + +# -------------------------------------------------------------------------- +# Red detected → issue opened with correct title + body +# -------------------------------------------------------------------------- +def test_red_detected_opens_issue(wd_module, monkeypatch): + """When main is red and no issue is open, POST a new one with the + correct title; body lists each failed context.""" + failed_ctx = [ + { + "context": "ci/test", + "state": "failure", + "target_url": "https://ci.example/run/42", + "description": "1 test failed", + }, + { + "context": "ci/lint", + "state": "error", + "target_url": "https://ci.example/run/43", + "description": "runner crashed", + }, + ] + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_RED)), + ("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): ( + 200, _combined_status("failure", failed_ctx), + ), + ("GET", "/repos/owner/repo/issues"): (200, []), # no existing issue + ("POST", "/repos/owner/repo/issues"): (201, {"number": 555}), + ("GET", "/repos/owner/repo/labels"): ( + 200, [{"id": 9, "name": "tier:high"}], + ), + ("POST", "/repos/owner/repo/issues/555/labels"): (200, []), + }) + monkeypatch.setattr(wd_module, "api", stub) + + wd_module.run_once(dry_run=False) + + # Find the POST call to create the issue and inspect its body. + post_calls = [c for c in stub.calls if c[0] == "POST" and c[1] == "/repos/owner/repo/issues"] + assert len(post_calls) == 1, post_calls + posted_body = post_calls[0][2] + expected_title = f"[main-red] owner/repo: {SHA_RED[:10]}" + assert posted_body["title"] == expected_title + body_text = posted_body["body"] + assert "ci/test" in body_text + assert "ci/lint" in body_text + assert "1 test failed" in body_text + assert "runner crashed" in body_text + assert SHA_RED[:10] in body_text + # Label apply attempted on the happy path: + assert ("POST", "/repos/owner/repo/issues/555/labels") in [ + (c[0], c[1]) for c in stub.calls + ] + + +# -------------------------------------------------------------------------- +# Idempotent: existing issue is PATCHed, not duplicated +# -------------------------------------------------------------------------- +def test_idempotent_existing_issue_patched_not_duplicated(wd_module, monkeypatch): + """When an open `[main-red] {repo}: {SHA[:10]}` issue already exists + for the current SHA, file_or_update_red PATCHes it. No POST.""" + existing_title = f"[main-red] owner/repo: {SHA_RED[:10]}" + failed_ctx = [ + {"context": "ci/test", "state": "failure", + "target_url": "https://x/y", "description": "boom"}, + ] + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_RED)), + ("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): ( + 200, _combined_status("failure", failed_ctx), + ), + ("GET", "/repos/owner/repo/issues"): ( + 200, [{"number": 7, "title": existing_title}], + ), + ("PATCH", "/repos/owner/repo/issues/7"): (200, {"number": 7}), + }) + monkeypatch.setattr(wd_module, "api", stub) + + wd_module.run_once(dry_run=False) + + methods_paths = [(c[0], c[1]) for c in stub.calls] + assert ("PATCH", "/repos/owner/repo/issues/7") in methods_paths, stub.calls + assert ("POST", "/repos/owner/repo/issues") not in methods_paths, ( + f"expected NO POST when issue exists (idempotent), got: {stub.calls}" + ) + + +# -------------------------------------------------------------------------- +# Auto-close: main green at NEW_SHA → close issue for OLD_SHA +# -------------------------------------------------------------------------- +def test_auto_close_when_main_returns_to_green(wd_module, monkeypatch): + """main green at SHA_GREEN with an open `[main-red]` issue for + SHA_RED → close the old issue with a 'returned to green' comment.""" + old_title = f"[main-red] owner/repo: {SHA_RED[:10]}" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_GREEN)), + ("GET", f"/repos/owner/repo/commits/{SHA_GREEN}/status"): ( + 200, _combined_status("success", [ + {"context": "ci/test", "state": "success"}, + ]), + ), + ("GET", "/repos/owner/repo/issues"): ( + 200, [{"number": 7, "title": old_title}], + ), + ("POST", "/repos/owner/repo/issues/7/comments"): (201, {"id": 100}), + ("PATCH", "/repos/owner/repo/issues/7"): (200, {"number": 7, "state": "closed"}), + }) + monkeypatch.setattr(wd_module, "api", stub) + + wd_module.run_once(dry_run=False) + + methods_paths = [(c[0], c[1]) for c in stub.calls] + # Comment posted with reference to the new SHA + assert ("POST", "/repos/owner/repo/issues/7/comments") in methods_paths + comment_calls = [ + c for c in stub.calls + if c[0] == "POST" and c[1] == "/repos/owner/repo/issues/7/comments" + ] + assert SHA_GREEN in comment_calls[0][2]["body"] + # Issue closed via PATCH state=closed + patch_calls = [ + c for c in stub.calls + if c[0] == "PATCH" and c[1] == "/repos/owner/repo/issues/7" + ] + assert patch_calls[0][2] == {"state": "closed"} + + +def test_auto_close_skips_when_main_pending(wd_module, monkeypatch): + """main pending (CI still running) at NEW_SHA → leave old issue alone. + Pending could resolve to red, so closing prematurely would lose the + breadcrumb of the prior red.""" + old_title = f"[main-red] owner/repo: {SHA_RED[:10]}" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_GREEN)), + ("GET", f"/repos/owner/repo/commits/{SHA_GREEN}/status"): ( + 200, _combined_status("pending", [ + {"context": "ci/test", "state": "pending"}, + ]), + ), + }) + monkeypatch.setattr(wd_module, "api", stub) + + wd_module.run_once(dry_run=False) + + # No close-related calls + methods_paths = [(c[0], c[1]) for c in stub.calls] + assert ("PATCH", "/repos/owner/repo/issues/7") not in methods_paths + assert ("GET", "/repos/owner/repo/issues") not in methods_paths + + +# -------------------------------------------------------------------------- +# HTTP-failure / api() raises — duplicate-write regression guard +# -------------------------------------------------------------------------- +def test_find_open_issue_for_sha_raises_on_transient_error(wd_module, monkeypatch): + """When the issue-search GET fails (transient 500), + find_open_issue_for_sha must propagate ApiError, NOT return None. + + REGRESSION CLASS PROOF: a pre-fix implementation that returned + `None` on api() failure would cause file_or_update_red to take the + POST branch and create a duplicate issue. This test FAILS on that + pre-fix code. Verified by temporarily replacing the script's + `raise ApiError` with `return [], None` and rerunning — this case + flips red. + """ + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/issues"): wd_module.ApiError( + "GET /repos/owner/repo/issues → HTTP 500: gateway timeout" + ), + }) + monkeypatch.setattr(wd_module, "api", stub) + with pytest.raises(wd_module.ApiError): + wd_module.find_open_issue_for_sha(SHA_RED) + + +def test_list_open_red_issues_raises_on_transient_error(wd_module, monkeypatch): + """Same contract for list_open_red_issues — close path must not + silently skip on transient error.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/issues"): wd_module.ApiError( + "GET /repos/owner/repo/issues → HTTP 502: bad gateway" + ), + }) + monkeypatch.setattr(wd_module, "api", stub) + with pytest.raises(wd_module.ApiError): + wd_module.list_open_red_issues() + + +def test_run_once_propagates_api_error_loudly(wd_module, monkeypatch): + """Transient outage on branches read → ApiError propagates through + run_once. The workflow run fails LOUDLY (correct behaviour); silent + fallthrough would hide that the watchdog is broken.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): wd_module.ApiError( + "GET /repos/owner/repo/branches/main → HTTP 503: service unavailable" + ), + }) + monkeypatch.setattr(wd_module, "api", stub) + with pytest.raises(wd_module.ApiError): + wd_module.run_once(dry_run=False) + + +# -------------------------------------------------------------------------- +# api() helper: raises on non-2xx +# -------------------------------------------------------------------------- +def test_api_raises_on_non_2xx(wd_module, monkeypatch): + """api() must raise ApiError on HTTP 500. This pins the + `feedback_api_helper_must_raise_not_return_dict` contract — the + duplicate-issue regression class depends on it.""" + + def fake_urlopen(req, timeout=30): + raise urllib.error.HTTPError( + req.full_url, 500, "Internal Server Error", {}, None, # type: ignore + ) + + monkeypatch.setattr(wd_module.urllib.request, "urlopen", fake_urlopen) + + with pytest.raises(wd_module.ApiError) as excinfo: + wd_module.api("GET", "/repos/owner/repo/issues") + assert "HTTP 500" in str(excinfo.value) + + +def test_api_raises_on_json_decode_when_expected(wd_module, monkeypatch): + """api(expect_json=True) raises ApiError if body is not valid JSON. + Closes the `{"_raw": ...}` fallthrough that callers misinterpret.""" + + class FakeResp: + status = 200 + + def read(self): + return b"not-json\n\n" + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def fake_urlopen(req, timeout=30): + return FakeResp() + + monkeypatch.setattr(wd_module.urllib.request, "urlopen", fake_urlopen) + + with pytest.raises(wd_module.ApiError): + wd_module.api("GET", "/repos/owner/repo/issues") + + +def test_api_allows_raw_when_expect_json_false(wd_module, monkeypatch): + """expect_json=False returns `{_raw: ...}` for known-quirky endpoints + per `feedback_gitea_create_api_unparseable_response`. Opt-in.""" + + class FakeResp: + status = 201 + + def read(self): + return b"not-json-but-created\n" + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def fake_urlopen(req, timeout=30): + return FakeResp() + + monkeypatch.setattr(wd_module.urllib.request, "urlopen", fake_urlopen) + status, body = wd_module.api( + "POST", "/repos/owner/repo/issues", expect_json=False, + ) + assert status == 201 + assert "_raw" in body + + +# -------------------------------------------------------------------------- +# --dry-run flag — no side effects +# -------------------------------------------------------------------------- +def test_dry_run_skips_writes(wd_module, monkeypatch, capsys): + """--dry-run: detector runs, would-be title/body printed, but no + POST/PATCH/comment calls are issued.""" + failed_ctx = [ + {"context": "ci/test", "state": "failure", + "target_url": "https://x/y", "description": "boom"}, + ] + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_RED)), + ("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): ( + 200, _combined_status("failure", failed_ctx), + ), + ("GET", "/repos/owner/repo/issues"): (200, []), + }) + monkeypatch.setattr(wd_module, "api", stub) + + wd_module.run_once(dry_run=True) + + methods = [c[0] for c in stub.calls] + assert "POST" not in methods, f"dry-run made writes: {stub.calls}" + assert "PATCH" not in methods, f"dry-run made writes: {stub.calls}" + captured = capsys.readouterr() + assert "[dry-run]" in captured.out + assert "[main-red]" in captured.out # title rendered + + +def test_dry_run_flag_parsed(wd_module): + """--dry-run wired into argparse.""" + ns = wd_module._parse_args(["--dry-run"]) + assert ns.dry_run is True + ns = wd_module._parse_args([]) + assert ns.dry_run is False + + +# -------------------------------------------------------------------------- +# Title format +# -------------------------------------------------------------------------- +def test_title_format_uses_short_sha(wd_module): + """Title is `[main-red] {repo}: {SHA[:10]}` — stable idempotency key.""" + t = wd_module.title_for(SHA_RED) + assert t == f"[main-red] owner/repo: {SHA_RED[:10]}" + # exactly 10 chars of SHA + assert SHA_RED[:10] in t + assert SHA_RED[:11] not in t + + +def test_list_open_red_issues_filters_by_prefix(wd_module, monkeypatch): + """list_open_red_issues only returns issues whose title starts with + the expected prefix — unrelated open issues are not touched.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/issues"): (200, [ + {"number": 1, "title": f"[main-red] owner/repo: {SHA_RED[:10]}"}, + {"number": 2, "title": "Some unrelated bug"}, + {"number": 3, "title": "[ci-drift] owner/repo: divergence"}, + {"number": 4, "title": f"[main-red] owner/repo: {SHA_GREEN[:10]}"}, + ]), + }) + monkeypatch.setattr(wd_module, "api", stub) + out = wd_module.list_open_red_issues() + assert [i["number"] for i in out] == [1, 4] + + +# -------------------------------------------------------------------------- +# get_head_sha / get_combined_status data-shape guards +# -------------------------------------------------------------------------- +def test_get_head_sha_raises_on_malformed_response(wd_module, monkeypatch): + """If Gitea returns a body without `commit.id`, raise ApiError — + do NOT proceed to file an issue with a bogus SHA.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): ( + 200, {"name": "main"}, # no commit object + ), + }) + monkeypatch.setattr(wd_module, "api", stub) + with pytest.raises(wd_module.ApiError): + wd_module.get_head_sha("main") + + +def test_get_head_sha_accepts_sha_field(wd_module, monkeypatch): + """Older Gitea versions may return `commit.sha` instead of `commit.id`. + Accept either — the watchdog must be tolerant to a documented shape + variance.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branches/main"): ( + 200, {"name": "main", "commit": {"sha": SHA_RED}}, + ), + }) + monkeypatch.setattr(wd_module, "api", stub) + assert wd_module.get_head_sha("main") == SHA_RED + + +# -------------------------------------------------------------------------- +# Loki event emitter (best-effort, must not raise) +# -------------------------------------------------------------------------- +def test_emit_loki_event_prints_json_line(wd_module, capsys, monkeypatch): + """emit_loki_event always prints a JSON line to stdout (for workflow + log capture) regardless of whether `logger` is installed.""" + # Force logger-not-found path to make the test deterministic. + monkeypatch.setattr(wd_module.shutil, "which", lambda name: None) + wd_module.emit_loki_event("main_red_detected", SHA_RED, ["ci/test"]) + captured = capsys.readouterr() + assert "main-red-watchdog event:" in captured.out + # Find the JSON payload after the prefix and verify it parses + line = [l for l in captured.out.splitlines() if "main-red-watchdog event:" in l][0] + payload = json.loads(line.split("main-red-watchdog event:", 1)[1].strip()) + assert payload["event_type"] == "main_red_detected" + assert payload["repo"] == "owner/repo" + assert payload["sha"] == SHA_RED + assert payload["failed_contexts"] == ["ci/test"] + + +def test_emit_loki_event_survives_logger_failure(wd_module, monkeypatch, capsys): + """If `logger` is present but the subprocess call raises, the event + emitter must NOT raise — emission is best-effort by contract.""" + monkeypatch.setattr(wd_module.shutil, "which", lambda name: "/usr/bin/logger") + + def boom(*a, **kw): + raise OSError("logger pipe failed") + monkeypatch.setattr(wd_module.subprocess, "run", boom) + + # Must not raise: + wd_module.emit_loki_event("main_red_detected", SHA_RED, ["ci/test"]) + captured = capsys.readouterr() + assert "logger call failed" in captured.err + + +# -------------------------------------------------------------------------- +# Runtime env guard +# -------------------------------------------------------------------------- +def test_require_runtime_env_exits_when_missing(wd_module, monkeypatch): + """_require_runtime_env() exits with code 2 when any required env + var is missing. Caught at main() entry, before any side-effecting + API call.""" + monkeypatch.delenv("GITEA_TOKEN", raising=False) + with pytest.raises(SystemExit) as excinfo: + wd_module._require_runtime_env() + assert excinfo.value.code == 2 -- 2.45.2