From 54a8cbcfca0eec81e505542fb04808c75acc2b36 Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Wed, 10 Jun 2026 15:02:18 +0000 Subject: [PATCH 1/2] ci(lint): guard against actions/setup-go caching on self-hosted fleet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Static workflow-shape lint forbidding setup-go cache (cache:true OR the default-true cases) — the actions/cache untar-over-GOCACHE-bind-mount corruption from the 2026-06-09/10 rollout. Lands advisory (continue-on-error: true); flips to required after the cli#16 sweep (ci.yml + release.yml) merges + 3 clean days. Co-Authored-By: Claude Fable 5 --- .gitea/scripts/lint_setup_go_cache.py | 144 +++++++++++++++++++++++ .gitea/workflows/lint-setup-go-cache.yml | 55 +++++++++ tests/test_lint_setup_go_cache.py | 114 ++++++++++++++++++ 3 files changed, 313 insertions(+) create mode 100644 .gitea/scripts/lint_setup_go_cache.py create mode 100644 .gitea/workflows/lint-setup-go-cache.yml create mode 100644 tests/test_lint_setup_go_cache.py diff --git a/.gitea/scripts/lint_setup_go_cache.py b/.gitea/scripts/lint_setup_go_cache.py new file mode 100644 index 0000000..bf6852e --- /dev/null +++ b/.gitea/scripts/lint_setup_go_cache.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +"""lint_setup_go_cache — forbid actions/setup-go cache on self-hosted runners. + +Forbidden shape +--------------- +Any `uses: actions/setup-go@...` step that enables actions/cache — +either `cache: true` explicitly OR the default-true case (a `cache-key` +/ `cache-dependency-path` set with NO `cache: false`). setup-go's +`cache` input DEFAULTS to true, so omitting it is also forbidden once +any cache-* input is present, and a bare setup-go with neither is +treated as default-true and flagged too (belt-and-braces: on our +self-hosted fleet the only safe value is explicit `cache: false`). + +Why +--- +The molecule self-hosted runners bind-mount a persistent, host-shared +GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod}, see +operator-config ops/runners/config.dedicated.yaml). actions/cache +(which setup-go drives when cache:true) untars its restored archive +OVER that bind mount -> "File exists" -> "Failed to restore" -> +partial cache -> downstream linker/typecheck failures on heavy jobs +(test -race link "too many errors", go-arch-lint "without types"). +The runner-level GOCACHE is the SSOT for caching; setup-go must not +also cache. Fix: add `cache: false` under the setup-go `with:`. + +Empirical: 2026-06-09/10 cross-repo rollout; sweep PRs +fix/setup-go-cache-vs-bind-mount (core#2524, cli#16). This guard +PREVENTS regression after those land. + +Detection is line-based (not full YAML) so it can attribute a precise +file:line and survives Gitea's ${{ }} expressions that confuse some +YAML loaders. We locate each setup-go step, then read the contiguous +`with:` block that follows it (same or deeper indent, up to the next +step `- ` at the step indent). +""" +import os +import re +import sys + +WORKFLOWS_DIR = os.environ.get("WORKFLOWS_DIR", ".gitea/workflows") + +SETUP_GO = re.compile(r'^(\s*)(?:-\s+)?uses:\s*actions/setup-go@', re.I) +# step boundary: a list item `- ` at an indent <= the step's own indent +STEP_ITEM = re.compile(r'^(\s*)-\s+\S') +CACHE_LINE = re.compile(r'^\s*cache:\s*(\S+)') +CACHE_DEP = re.compile(r'^\s*cache-(dependency-path|key):') +WITH_LINE = re.compile(r'^\s*with:\s*$') + + +def step_indent(line): + m = re.match(r'^(\s*)', line) + return len(m.group(1)) + + +def scan_file(path): + """Return list of (lineno, reason) violations.""" + with open(path) as f: + lines = f.readlines() + viols = [] + i = 0 + n = len(lines) + while i < n: + m = SETUP_GO.match(lines[i]) + if not m: + i += 1 + continue + go_line = i + 1 + # Indent of the `uses:` key. The step's `with:` block lives at + # the same key indent (siblings under the same `- ` list item). + uses_indent = step_indent(lines[i]) + # Collect the block belonging to this step: subsequent lines that + # are more-indented than the step list marker, stopping at the + # next `- ` item whose indent <= the list-marker indent. + # The list marker indent is uses_indent if `- uses:` inline, + # else uses_indent-2 (key under a `- `). Normalize to the marker. + # Simpler: gather until a `- ` item at indent < uses_indent, or + # indent == uses_indent for the `- uses:` inline form. + inline_dash = bool(re.match(r'^\s*-\s+uses:', lines[i])) + marker_indent = uses_indent if inline_dash else uses_indent - 2 + cache_val = None + has_cache_dep = False + j = i + 1 + while j < n: + ln = lines[j] + if ln.strip() == "" or ln.lstrip().startswith("#"): + j += 1 + continue + sm = STEP_ITEM.match(ln) + if sm and step_indent(ln) <= marker_indent: + break # next step + # also stop if we dedented out of this step entirely + if step_indent(ln) <= marker_indent and not WITH_LINE.match(ln): + break + cm = CACHE_LINE.match(ln) + if cm: + cache_val = cm.group(1).strip().strip('"\'').lower() + if CACHE_DEP.match(ln): + has_cache_dep = True + j += 1 + # Decide + if cache_val == "true": + viols.append((go_line, "cache: true (must be `cache: false`)")) + elif cache_val is None: + # default-true. Flag — explicit cache:false is required on + # the self-hosted fleet. Strongest with cache-dep present, + # but bare setup-go is also default-true so flag both. + if has_cache_dep: + viols.append((go_line, "cache-dependency-path/key set with no `cache:` (defaults to true)")) + else: + viols.append((go_line, "no `cache:` set (defaults to true; require explicit `cache: false`)")) + # cache_val == "false" -> OK + i = j + return viols + + +def main(): + if not os.path.isdir(WORKFLOWS_DIR): + print(f"OK: no {WORKFLOWS_DIR} directory") + return 0 + all_viols = [] + for fn in sorted(os.listdir(WORKFLOWS_DIR)): + if not (fn.endswith(".yml") or fn.endswith(".yaml")): + continue + path = os.path.join(WORKFLOWS_DIR, fn) + for lineno, reason in scan_file(path): + all_viols.append(f"{path}:{lineno}: actions/setup-go with caching enabled — {reason}") + if all_viols: + print("FAIL: actions/setup-go must set `cache: false` on the self-hosted fleet:") + for v in all_viols: + print(f" - {v}") + print() + print("Why: runners bind-mount a host-shared GOCACHE/GOMODCACHE") + print(" (/var/cache/ci-go-{build,mod}, operator-config") + print(" ops/runners/config.dedicated.yaml). actions/cache untars OVER") + print(" the bind mount -> 'File exists' -> partial cache -> race-link") + print(" / arch-lint failures. The runner-level GOCACHE is the cache SSOT.") + print(" Fix: add `cache: false` under the setup-go `with:` block.") + return 1 + print("OK: every actions/setup-go step sets cache: false.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.gitea/workflows/lint-setup-go-cache.yml b/.gitea/workflows/lint-setup-go-cache.yml new file mode 100644 index 0000000..2584097 --- /dev/null +++ b/.gitea/workflows/lint-setup-go-cache.yml @@ -0,0 +1,55 @@ +name: lint-setup-go-cache + +# Static workflow-shape lint: forbid `actions/setup-go` caching on the +# self-hosted fleet. Every setup-go step must set `cache: false`. +# +# Forbidden shape: `cache: true` (explicit), OR cache-dependency-path / +# cache-key set with no `cache:` (defaults to true), OR a bare setup-go +# with no `cache:` at all (still default-true). +# +# Why (2026-06-09/10 cross-repo rollout): the self-hosted runners +# bind-mount a host-shared GOCACHE/GOMODCACHE (/var/cache/ci-go-{build, +# mod} — operator-config ops/runners/config.dedicated.yaml). setup-go's +# actions/cache untars OVER that bind mount -> "File exists" -> partial +# cache -> linker/typecheck failures on heavy jobs. The runner-level +# GOCACHE is the cache SSOT. Fix: add `cache: false` under setup-go. +# +# Coordination: the sweep fix/setup-go-cache-vs-bind-mount (cli#16) +# removes this repo's `cache: true` hits (ci.yml + release.yml). Until +# it merges, this lint lists them — so it lands advisory +# (continue-on-error: true). FOLLOW-UP: after cli#16 merges + 3 clean +# days, flip continue-on-error -> false. + +on: + pull_request: + paths: + - '.gitea/workflows/**' + - '.gitea/scripts/lint_setup_go_cache.py' + - 'tests/test_lint_setup_go_cache.py' + push: + branches: [main] + paths: + - '.gitea/workflows/**' + - '.gitea/scripts/lint_setup_go_cache.py' + +permissions: + contents: read + +jobs: + lint: + name: lint-setup-go-cache + runs-on: ubuntu-latest + timeout-minutes: 5 + # Advisory until cli#16 sweep merges + 3 clean days, then flip false. + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Run lint-setup-go-cache + run: python3 .gitea/scripts/lint_setup_go_cache.py + - name: Run lint-setup-go-cache unit tests + run: | + python -m pip install --quiet pytest + python3 -m pytest tests/test_lint_setup_go_cache.py -q diff --git a/tests/test_lint_setup_go_cache.py b/tests/test_lint_setup_go_cache.py new file mode 100644 index 0000000..33441c8 --- /dev/null +++ b/tests/test_lint_setup_go_cache.py @@ -0,0 +1,114 @@ +"""Unit tests for lint_setup_go_cache — fixture catch + clean proofs.""" +import importlib.util +import os +import textwrap + +import pytest + +HERE = os.path.dirname(__file__) +SCRIPT = os.path.join(HERE, "..", ".gitea", "scripts", "lint_setup_go_cache.py") +spec = importlib.util.spec_from_file_location("lint_setup_go_cache", SCRIPT) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) + + +def _write(tmp_path, body): + p = tmp_path / "wf.yml" + p.write_text(textwrap.dedent(body)) + return str(p) + + +def test_cache_true_explicit_flagged(tmp_path): + p = _write(tmp_path, """\ + jobs: + build: + runs-on: docker-host + steps: + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + cache: true + cache-dependency-path: go.sum + """) + viols = mod.scan_file(p) + assert len(viols) == 1 + assert "cache: true" in viols[0][1] + + +def test_default_true_with_cachedep_flagged(tmp_path): + p = _write(tmp_path, """\ + jobs: + build: + runs-on: docker-host + steps: + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + cache-dependency-path: go.sum + """) + viols = mod.scan_file(p) + assert len(viols) == 1 + assert "defaults to true" in viols[0][1] + + +def test_bare_setup_go_default_true_flagged(tmp_path): + p = _write(tmp_path, """\ + jobs: + build: + runs-on: docker-host + steps: + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + - run: go build ./... + """) + viols = mod.scan_file(p) + assert len(viols) == 1 + assert "defaults to true" in viols[0][1] + + +def test_cache_false_clean(tmp_path): + p = _write(tmp_path, """\ + jobs: + build: + runs-on: docker-host + steps: + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + cache: false + cache-dependency-path: go.sum + """) + assert mod.scan_file(p) == [] + + +def test_no_setup_go_clean(tmp_path): + p = _write(tmp_path, """\ + jobs: + build: + runs-on: docker-host + steps: + - uses: actions/checkout@v4 + - run: echo hi + """) + assert mod.scan_file(p) == [] + + +def test_multiple_steps_only_bad_flagged(tmp_path): + p = _write(tmp_path, """\ + jobs: + build: + runs-on: docker-host + steps: + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + cache: false + - uses: actions/setup-go@v6 + with: + go-version: '1.25' + cache: true + """) + viols = mod.scan_file(p) + assert len(viols) == 1 + assert "cache: true" in viols[0][1] -- 2.52.0 From 4af1028ffd96f384dbdf7d10d2ae8fa4fe560b1c Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Thu, 11 Jun 2026 00:58:58 +0000 Subject: [PATCH 2/2] ci(lint): re-trigger lint-setup-go-cache after dangling-pending wedge Prior run 482133 at head 54a8cbcf was cancelled before it ever started (queued, never scheduled), so Gitea never posted a terminal commit-status. The lint-setup-go-cache (pull_request) status stuck PENDING since 15:07 and 405-blocked merge of this PR. No docker-host present in any cli workflow (all runs-on: ubuntu-latest, the cli fleet); comment-only touch re-fires the path-triggered (.gitea/workflows/**) workflow on a fresh schedulable head. --- .gitea/workflows/lint-setup-go-cache.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitea/workflows/lint-setup-go-cache.yml b/.gitea/workflows/lint-setup-go-cache.yml index 2584097..94db38e 100644 --- a/.gitea/workflows/lint-setup-go-cache.yml +++ b/.gitea/workflows/lint-setup-go-cache.yml @@ -53,3 +53,7 @@ jobs: run: | python -m pip install --quiet pytest python3 -m pytest tests/test_lint_setup_go_cache.py -q + +# CI re-trigger 2026-06-10: prior run (482133) was cancelled before start +# (queued, never scheduled) leaving its commit-status dangling PENDING and +# 405-blocking merge. runs-on is ubuntu-latest (cli fleet); no logic change. -- 2.52.0