From 8ad12a5337441cf0863ec892611f9be7e9fa4289 Mon Sep 17 00:00:00 2001 From: core-devops Date: Fri, 15 May 2026 17:55:02 -0700 Subject: [PATCH] fix(ci): add per-ref cancel-in-progress concurrency to storm-contributing workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runner-queue retrigger-storm root fix. On 2026-05-15 a superseded-retrigger storm filled the Gitea Actions queue with 1059 dead-SHA jobs; molecule-core was 95% of a 1290-job queue (PR#1211 alone had 114 runs). A one-time DB triage cleared the live queue but the root cause refills it: standalone PR-validation workflows lacking a concurrency block spawn a fresh run on every PR-fight push / empty-commit rerun (the only 1.22.6 rerun mechanism) and the superseded older-SHA runs never cancel. ci.yml and sop-checklist.yml — the two workflows feeding the only branch-protection required contexts (CI / all-required, sop-checklist / all-items-acked) — already carry correct per-ref cancel-in-progress concurrency. This adds the same established in-repo grouping key to the six remaining standalone storm contributors: - secret-scan.yml - block-internal-paths.yml - lint-curl-status-capture.yml - lint-workflow-yaml.yml - check-migration-collisions.yml - cascade-list-drift-gate.yml Grouping key: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} with cancel-in-progress: true. Per saved memory feedback_concurrency_group_per_sha this is per-PR-number/per-ref scoped (not global → never cancels a sibling PR or sibling workflow; not per-SHA → still actually cancels superseded runs). None of the six are auto-promote-staging gate inputs (gate-check-v3 / e2e-* are, and correctly keep their per-SHA cancel:false), and none are branch-protection required contexts, so cancel-in-progress cannot leave a required check permanently cancelled — protection evaluates the latest SHA, whose run always completes. Purely additive; no logic changes. Defense-in-depth janitor cron tracked separately on operator-config. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/block-internal-paths.yml | 10 +++++++ .gitea/workflows/cascade-list-drift-gate.yml | 9 +++++++ .../workflows/check-migration-collisions.yml | 9 +++++++ .gitea/workflows/lint-curl-status-capture.yml | 11 ++++++++ .gitea/workflows/lint-workflow-yaml.yml | 10 +++++++ .gitea/workflows/secret-scan.yml | 27 +++++++++++++++++++ 6 files changed, 76 insertions(+) diff --git a/.gitea/workflows/block-internal-paths.yml b/.gitea/workflows/block-internal-paths.yml index 8fff3bfec..441d504ee 100644 --- a/.gitea/workflows/block-internal-paths.yml +++ b/.gitea/workflows/block-internal-paths.yml @@ -27,6 +27,16 @@ on: push: branches: [main, staging] +# Auto-cancel superseded runs — runner-queue retrigger-storm root fix. +# Per-PR-number / per-ref scoped (NOT global, NOT per-SHA) so superseded +# runs cancel without touching sibling PRs or sibling workflows. Safe to +# cancel-in-progress: not an auto-promote-staging gate input and not a +# branch-protection required context. Full rationale + memory citation +# (feedback_concurrency_group_per_sha) in secret-scan.yml. +concurrency: + group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + env: GITHUB_SERVER_URL: https://git.moleculesai.app diff --git a/.gitea/workflows/cascade-list-drift-gate.yml b/.gitea/workflows/cascade-list-drift-gate.yml index a7230fa7b..05d3f0c7d 100644 --- a/.gitea/workflows/cascade-list-drift-gate.yml +++ b/.gitea/workflows/cascade-list-drift-gate.yml @@ -36,6 +36,15 @@ on: - .gitea/workflows/publish-runtime.yml - scripts/check-cascade-list-vs-manifest.sh +# Auto-cancel superseded runs — runner-queue retrigger-storm root fix. +# pull_request-only + per-PR-number / per-ref scoped (NOT global, NOT +# per-SHA). Safe to cancel-in-progress: not an auto-promote-staging gate +# input and not a branch-protection required context. Full rationale +# + memory citation (feedback_concurrency_group_per_sha) in secret-scan.yml. +concurrency: + group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + env: GITHUB_SERVER_URL: https://git.moleculesai.app diff --git a/.gitea/workflows/check-migration-collisions.yml b/.gitea/workflows/check-migration-collisions.yml index 991dd11a4..be838e743 100644 --- a/.gitea/workflows/check-migration-collisions.yml +++ b/.gitea/workflows/check-migration-collisions.yml @@ -30,6 +30,15 @@ on: - 'scripts/ops/check_migration_collisions.py' - '.gitea/workflows/check-migration-collisions.yml' +# Auto-cancel superseded runs — runner-queue retrigger-storm root fix. +# pull_request-only + per-PR-number / per-ref scoped (NOT global, NOT +# per-SHA). Safe to cancel-in-progress: not an auto-promote-staging gate +# input and not a branch-protection required context. Full rationale +# + memory citation (feedback_concurrency_group_per_sha) in secret-scan.yml. +concurrency: + group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + env: GITHUB_SERVER_URL: https://git.moleculesai.app diff --git a/.gitea/workflows/lint-curl-status-capture.yml b/.gitea/workflows/lint-curl-status-capture.yml index e46371eff..ade70db9e 100644 --- a/.gitea/workflows/lint-curl-status-capture.yml +++ b/.gitea/workflows/lint-curl-status-capture.yml @@ -41,6 +41,17 @@ on: - '.gitea/scripts/lint-curl-status-capture.py' - 'tests/test_lint_curl_status_capture.py' +# Auto-cancel superseded runs — runner-queue retrigger-storm root fix. +# Per-PR-number / per-ref scoped (NOT global, NOT per-SHA) so superseded +# runs cancel without touching sibling PRs or sibling workflows. Safe to +# cancel-in-progress: path-filtered but NOT an auto-promote-staging gate +# input and NOT a branch-protection required context (per-ref scoping +# keeps staging/main pushes in distinct groups regardless). Full rationale +# + memory citation (feedback_concurrency_group_per_sha) in secret-scan.yml. +concurrency: + group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + env: GITHUB_SERVER_URL: https://git.moleculesai.app diff --git a/.gitea/workflows/lint-workflow-yaml.yml b/.gitea/workflows/lint-workflow-yaml.yml index 5d2216de0..288464b57 100644 --- a/.gitea/workflows/lint-workflow-yaml.yml +++ b/.gitea/workflows/lint-workflow-yaml.yml @@ -43,6 +43,16 @@ on: - '.gitea/scripts/lint-workflow-yaml.py' - 'tests/test_lint_workflow_yaml.py' +# Auto-cancel superseded runs — runner-queue retrigger-storm root fix. +# Per-PR-number / per-ref scoped (NOT global, NOT per-SHA) so superseded +# runs cancel without touching sibling PRs or sibling workflows. Safe to +# cancel-in-progress: path-filtered but NOT an auto-promote-staging gate +# input and NOT a branch-protection required context. Full rationale +# + memory citation (feedback_concurrency_group_per_sha) in secret-scan.yml. +concurrency: + group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + # Belt-and-suspenders against runner default # (feedback_act_runner_github_server_url). env: diff --git a/.gitea/workflows/secret-scan.yml b/.gitea/workflows/secret-scan.yml index 6f1583f4e..bab48f2ca 100644 --- a/.gitea/workflows/secret-scan.yml +++ b/.gitea/workflows/secret-scan.yml @@ -26,6 +26,33 @@ on: push: branches: [main, staging] +# Auto-cancel superseded runs (runner-queue retrigger-storm root fix). +# A PR-fight push / empty-commit rerun (the only 1.22.6 rerun mechanism) +# otherwise spawns a fresh run that never cancels, piling thousands of +# dead-SHA jobs on the act_runner queue (2026-05-15 storm: molecule-core +# was 95% of a 1290-job queue; this workflow had no concurrency block). +# +# Grouping key rationale (per saved memory feedback_concurrency_group_per_sha): +# - per-PR-number / per-ref scoped, NOT global: PR#A's runs never cancel +# PR#B's runs, and a staging push never collides with a main push +# (github.ref differs). NOT too broad. +# - includes github.workflow: this workflow superseding itself does not +# cancel an unrelated sibling workflow's run. +# - NOT per-SHA: a per-SHA group makes every commit its own group so +# nothing ever cancels — that defeats the storm fix entirely. +# - cancel-in-progress: true is SAFE here: secret-scan is NOT an +# auto-promote-staging gate input (gate-check-v3 / e2e-* are; this is +# not), and it is NOT a branch-protection required context — molecule-core +# protection requires only `CI / all-required` + `sop-checklist / +# all-items-acked`, both of which already carry correct per-ref +# cancel-in-progress concurrency. Branch protection evaluates the +# LATEST commit SHA, whose run always completes (only superseded +# older-SHA runs are cancelled), so the gate still resolves terminally. +# Same group expression already used by sop-checklist.yml / sop-tier-check.yml. +concurrency: + group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: scan: name: Scan diff for credential-shaped strings -- 2.52.0