From 6ee9ecdf0d9d61ca5747ac9c94cfe81e52d0617c Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Mon, 11 May 2026 19:39:43 -0700 Subject: [PATCH] fix(ci)(interim): disable status-reaper + main-red-watchdog crons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC#420 Option-C machinery has been down ~2.5h: - status-reaper rev2 (PR#633, merged 01:48Z): 0 'Compensated by status-reaper' status on the last 14 main commits. Schedule reds stranded on stale commits despite the rev2 sweep-last-10 design. - main-red-watchdog: 'Failing after 10m56s' with timeout-minutes:5 — runner saturation queue-lag pushed it past its own timeout. No [main-red] issues filed during the outage despite 5 reds on HEAD e7965a0f at the high watermark. Both workflows were themselves contributing to the red pileup on main + queuing the ubuntu-latest pool. Cheap-and-safe interim: comment out the schedule: blocks. workflow_dispatch: stays so they can be triggered manually for debugging. Re-enable after: 1. rev3 lands (likely scan_workflows() should LOG-and-skip rather than sys.exit on a malformed workflow; list_recent_commit_shas() should degrade gracefully) 2. Dedicated status-ops runner-label (route status-reaper + watchdog + ci-required-drift to it so they don't queue behind CI-merge-churn) Per hongming-pc2 02:31Z directive: 'pick one: rev3+raise-timeout OR temporarily disable the crons'. Choosing disable for safety while rev3 investigation proceeds. Reviewed-by: hongming-pc2 (pre-APPROVE on sight 02:31Z) Author: claude-ceo-assistant (orchestrator emergency; operator-host unreachable 02:01-02:38Z blocked SSH-bridge to core-devops persona) Cross-links: task #90 (rev2), task #75 (main-red sweep), RFC#420 Option-C --- .gitea/workflows/main-red-watchdog.yml | 11 +++++++---- .gitea/workflows/status-reaper.yml | 17 ++++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.gitea/workflows/main-red-watchdog.yml b/.gitea/workflows/main-red-watchdog.yml index 2dbec72b..f3f62be7 100644 --- a/.gitea/workflows/main-red-watchdog.yml +++ b/.gitea/workflows/main-red-watchdog.yml @@ -37,10 +37,13 @@ name: main-red-watchdog # "unknown on type" when `workflow_dispatch.inputs.X` is present. Revisit # when Gitea ≥ 1.23 is fleet-wide. on: - schedule: - # Hourly at :05 — task spec calls for "off-zero" (`5 * * * *`), - # offset from :17 (ci-required-drift) and :00 (peak cron load). - - cron: '5 * * * *' + # SCHEDULE DISABLED 2026-05-12 — interim per RFC#420 Option-C machinery-down emergency + # Watchdog timing out behind runner saturation; rev3+dedicated-runner-label in flight + # Re-enable after rev3 lands + runner saturation root resolved + # schedule: + # # Hourly at :05 — task spec calls for "off-zero" (`5 * * * *`), + # # offset from :17 (ci-required-drift) and :00 (peak cron load). + # - cron: '5 * * * *' workflow_dispatch: # Read commit status + branch ref + issues; write issues (open/PATCH/close). diff --git a/.gitea/workflows/status-reaper.yml b/.gitea/workflows/status-reaper.yml index 4d992b4b..f6d0289d 100644 --- a/.gitea/workflows/status-reaper.yml +++ b/.gitea/workflows/status-reaper.yml @@ -53,13 +53,16 @@ name: status-reaper # `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as # "unknown on type" when `workflow_dispatch.inputs.X` is present. on: - schedule: - # Every 5 minutes. Off-zero alignment with sibling cron workflows: - # ci-required-drift (`:17`), main-red-watchdog (`:05`), - # railway-pin-audit (`:23`). 5-min cadence gives a tight enough - # close on schedule-triggered false-reds that main-red-watchdog - # (hourly :05) almost never files an issue on the false case. - - cron: '*/5 * * * *' + # SCHEDULE DISABLED 2026-05-12 — interim per RFC#420 Option-C machinery-down emergency + # Reaper rev2 not compensating + watchdog timeout-cascade; rev3 in flight + # Re-enable after rev3 lands + runner saturation root resolved + # schedule: + # # Every 5 minutes. Off-zero alignment with sibling cron workflows: + # # ci-required-drift (`:17`), main-red-watchdog (`:05`), + # # railway-pin-audit (`:23`). 5-min cadence gives a tight enough + # # close on schedule-triggered false-reds that main-red-watchdog + # # (hourly :05) almost never files an issue on the false case. + # - cron: '*/5 * * * *' workflow_dispatch: # Compensating-status POST needs write on repo statuses; no other