fix(ci): all-required sentinel skips null-result Phase-3 jobs #581

Closed
infra-sre wants to merge 1 commits from sre/fix-all-required-null-result into main

View File

@ -493,10 +493,14 @@ jobs:
# explicitly excludes `github.event_name`-gated jobs from F1 (see
# `.gitea/scripts/ci-required-drift.py::ci_job_names`).
#
# NOTE: `continue-on-error: true` is intentionally NOT set here — Phase 3
# (parent PR for ci.yml port, RFC §1) sets it on the underlying build
# jobs to surface defects without blocking. The sentinel itself must
# hard-fail; that's the whole point.
# NOTE: continue-on-error: true is intentionally NOT set on this job.
# The sentinel must hard-fail when real jobs fail (Phase 3 notwithstanding).
# Phase 3 noise is handled by the assertion skipping null results:
# when a Phase-3 job (continue-on-error: true) fails, its result is null.
# `v.get("result") not in ("success", None)` skips null so the sentinel
# does not hard-fail on Phase-3 null results. Once Phase 3 flips off
# (underlying jobs set continue-on-error: false), null disappears and the
# sentinel becomes a reliable health proxy.
runs-on: ubuntu-latest
timeout-minutes: 1
needs:
@ -510,18 +514,22 @@ jobs:
- name: Assert every required dependency succeeded
run: |
set -euo pipefail
# `needs.*.result` is one of: success | failure | cancelled | skipped
# `needs.*.result` is one of: success | failure | cancelled | skipped | null
# - null = underlying job used continue-on-error: true and failed (Phase 3)
# or job is still in-flight (should not reach here with if: always())
# We assert success per dep (not != failure) — see RFC §2 reasoning above.
# Null is skipped so Phase 3 jobs (continue-on-error: true) don't hard-fail
# the sentinel during the noise-reduction period.
results='${{ toJSON(needs) }}'
echo "$results"
echo "$results" | python3 -c '
import json, sys
ns = json.load(sys.stdin)
bad = [(k, v.get("result")) for k, v in ns.items() if v.get("result") != "success"]
bad = [(k, v.get("result")) for k, v in ns.items() if v.get("result") not in ("success", None)]
if bad:
print(f"FAIL: jobs not green:", file=sys.stderr)
for k, r in bad:
print(f" - {k}: {r}", file=sys.stderr)
sys.exit(1)
print(f"OK: all {len(ns)} required jobs succeeded")
print(f"OK: all {len(ns)} required jobs succeeded (null results from Phase-3 continue-on-error: true are skipped)")
'