molecule-core/.github/workflows/auto-promote-stale-alarm.yml

name: auto-promote-stale-alarm

# Hourly cron + on-demand alarm for the silent-block failure mode that
# motivated issue #2975:
#   - The auto-promote-staging.yml workflow opened a PR + armed
#     auto-merge, but main's branch protection requires a human review
#     (reviewDecision=REVIEW_REQUIRED). The PR sat BLOCKED with no
#     surface-up-the-stack for 12+ hours, holding 25 commits hostage
#     including the Memory v2 redesign and a reno-stars data-loss fix.
#
# This workflow runs `scripts/check-stale-promote-pr.sh` against the
# repo's open auto-promote PRs (base=main head=staging). When a PR has
# been BLOCKED on REVIEW_REQUIRED for >4h, it:
#   1. Emits a workflow-level warning (visible in run summary + the
#      Actions UI feed).
#   2. Posts a comment on the PR (idempotent — one alarm per PR).
#
# The detection logic lives in scripts/check-stale-promote-pr.sh so
# it's unit-testable with stubbed `gh` (see test-check-stale-promote-pr.sh).
# This file is the schedule + invocation surface only — SSOT for the
# detector itself.

on:
  schedule:
    # Hourly. Cheap (one `gh pr list` + jq), and 1h granularity is
    # plenty for a 4h staleness threshold — operators see the alarm
    # within at most 1h of crossing the threshold.
    - cron: "27 * * * *"  # at :27 to dodge the cron herd at :00
  workflow_dispatch:
    inputs:
      stale_hours:
        description: "Hours after which a BLOCKED+REVIEW_REQUIRED PR is stale (default 4)"
        required: false
        default: "4"
      post_comment:
        description: "Post a comment on stale PRs (default true)"
        required: false
        default: "true"

permissions:
  contents: read
  pull-requests: write  # post comments on stale PRs

# Serialize so the on-demand and scheduled runs don't double-comment
# the same PR. cancel-in-progress=false because the script is idempotent
# (existing comment marker prevents dupes), but a scheduled run firing
# while a manual one runs would just re-list the same PR set.
concurrency:
  group: auto-promote-stale-alarm
  cancel-in-progress: false

jobs:
  scan:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout (need scripts/ only)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          sparse-checkout: |
            scripts/check-stale-promote-pr.sh
          sparse-checkout-cone-mode: false
      - name: Run stale-PR detector
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          GITHUB_REPOSITORY: ${{ github.repository }}
          STALE_HOURS: ${{ inputs.stale_hours || '4' }}
          POST_COMMENT: ${{ inputs.post_comment || 'true' }}
        run: |
          # The script's exit code reflects the count of stale PRs.
          # We don't want a stale finding to fail the workflow run —
          # the warning + comment are the signal, the green/red is
          # noise. So convert any non-zero exit to a workflow notice
          # and exit 0.
          set +e
          bash scripts/check-stale-promote-pr.sh
          rc=$?
          set -e
          if [ "$rc" -ne 0 ]; then
            echo "::notice::Stale PR detector found $rc PR(s) needing attention. See warnings above + comments on the PRs."
          fi
          # Always succeed — operator-facing surface is the warning,
          # not the workflow status.
          exit 0