ci(lint): guard actions/setup-go cache on self-hosted fleet #2539
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""lint_setup_go_cache — forbid actions/setup-go cache on self-hosted runners.
|
||||
|
||||
Forbidden shape
|
||||
---------------
|
||||
Any `uses: actions/setup-go@...` step that enables actions/cache —
|
||||
either `cache: true` explicitly OR the default-true case (a `cache-key`
|
||||
/ `cache-dependency-path` set with NO `cache: false`). setup-go's
|
||||
`cache` input DEFAULTS to true, so omitting it is also forbidden once
|
||||
any cache-* input is present, and a bare setup-go with neither is
|
||||
treated as default-true and flagged too (belt-and-braces: on our
|
||||
self-hosted fleet the only safe value is explicit `cache: false`).
|
||||
|
||||
Why
|
||||
---
|
||||
The molecule self-hosted runners bind-mount a persistent, host-shared
|
||||
GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod}, see
|
||||
operator-config ops/runners/config.dedicated.yaml). actions/cache
|
||||
(which setup-go drives when cache:true) untars its restored archive
|
||||
OVER that bind mount -> "File exists" -> "Failed to restore" ->
|
||||
partial cache -> downstream linker/typecheck failures on heavy jobs
|
||||
(test -race link "too many errors", go-arch-lint "without types").
|
||||
The runner-level GOCACHE is the SSOT for caching; setup-go must not
|
||||
also cache. Fix: add `cache: false` under the setup-go `with:`.
|
||||
|
||||
Empirical: 2026-06-09/10 cross-repo rollout; sweep PRs
|
||||
fix/setup-go-cache-vs-bind-mount (core#2524, cli#16). This guard
|
||||
PREVENTS regression after those land.
|
||||
|
||||
Detection is line-based (not full YAML) so it can attribute a precise
|
||||
file:line and survives Gitea's ${{ }} expressions that confuse some
|
||||
YAML loaders. We locate each setup-go step, then read the contiguous
|
||||
`with:` block that follows it (same or deeper indent, up to the next
|
||||
step `- ` at the step indent).
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
WORKFLOWS_DIR = os.environ.get("WORKFLOWS_DIR", ".gitea/workflows")
|
||||
|
||||
SETUP_GO = re.compile(r'^(\s*)(?:-\s+)?uses:\s*actions/setup-go@', re.I)
|
||||
# step boundary: a list item `- ` at an indent <= the step's own indent
|
||||
STEP_ITEM = re.compile(r'^(\s*)-\s+\S')
|
||||
CACHE_LINE = re.compile(r'^\s*cache:\s*(\S+)')
|
||||
CACHE_DEP = re.compile(r'^\s*cache-(dependency-path|key):')
|
||||
WITH_LINE = re.compile(r'^\s*with:\s*$')
|
||||
|
||||
|
||||
def step_indent(line):
|
||||
m = re.match(r'^(\s*)', line)
|
||||
return len(m.group(1))
|
||||
|
||||
|
||||
def scan_file(path):
|
||||
"""Return list of (lineno, reason) violations."""
|
||||
with open(path) as f:
|
||||
lines = f.readlines()
|
||||
viols = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
while i < n:
|
||||
m = SETUP_GO.match(lines[i])
|
||||
if not m:
|
||||
i += 1
|
||||
continue
|
||||
go_line = i + 1
|
||||
# Indent of the `uses:` key. The step's `with:` block lives at
|
||||
# the same key indent (siblings under the same `- ` list item).
|
||||
uses_indent = step_indent(lines[i])
|
||||
# Collect the block belonging to this step: subsequent lines that
|
||||
# are more-indented than the step list marker, stopping at the
|
||||
# next `- ` item whose indent <= the list-marker indent.
|
||||
# The list marker indent is uses_indent if `- uses:` inline,
|
||||
# else uses_indent-2 (key under a `- `). Normalize to the marker.
|
||||
# Simpler: gather until a `- ` item at indent < uses_indent, or
|
||||
# indent == uses_indent for the `- uses:` inline form.
|
||||
inline_dash = bool(re.match(r'^\s*-\s+uses:', lines[i]))
|
||||
marker_indent = uses_indent if inline_dash else uses_indent - 2
|
||||
cache_val = None
|
||||
has_cache_dep = False
|
||||
j = i + 1
|
||||
while j < n:
|
||||
ln = lines[j]
|
||||
if ln.strip() == "" or ln.lstrip().startswith("#"):
|
||||
j += 1
|
||||
continue
|
||||
sm = STEP_ITEM.match(ln)
|
||||
if sm and step_indent(ln) <= marker_indent:
|
||||
break # next step
|
||||
# also stop if we dedented out of this step entirely
|
||||
if step_indent(ln) <= marker_indent and not WITH_LINE.match(ln):
|
||||
break
|
||||
cm = CACHE_LINE.match(ln)
|
||||
if cm:
|
||||
cache_val = cm.group(1).strip().strip('"\'').lower()
|
||||
if CACHE_DEP.match(ln):
|
||||
has_cache_dep = True
|
||||
j += 1
|
||||
# Decide
|
||||
if cache_val == "true":
|
||||
viols.append((go_line, "cache: true (must be `cache: false`)"))
|
||||
elif cache_val is None:
|
||||
# default-true. Flag — explicit cache:false is required on
|
||||
# the self-hosted fleet. Strongest with cache-dep present,
|
||||
# but bare setup-go is also default-true so flag both.
|
||||
if has_cache_dep:
|
||||
viols.append((go_line, "cache-dependency-path/key set with no `cache:` (defaults to true)"))
|
||||
else:
|
||||
viols.append((go_line, "no `cache:` set (defaults to true; require explicit `cache: false`)"))
|
||||
# cache_val == "false" -> OK
|
||||
i = j
|
||||
return viols
|
||||
|
||||
|
||||
def main():
|
||||
if not os.path.isdir(WORKFLOWS_DIR):
|
||||
print(f"OK: no {WORKFLOWS_DIR} directory")
|
||||
return 0
|
||||
all_viols = []
|
||||
for fn in sorted(os.listdir(WORKFLOWS_DIR)):
|
||||
if not (fn.endswith(".yml") or fn.endswith(".yaml")):
|
||||
continue
|
||||
path = os.path.join(WORKFLOWS_DIR, fn)
|
||||
for lineno, reason in scan_file(path):
|
||||
all_viols.append(f"{path}:{lineno}: actions/setup-go with caching enabled — {reason}")
|
||||
if all_viols:
|
||||
print("FAIL: actions/setup-go must set `cache: false` on the self-hosted fleet:")
|
||||
for v in all_viols:
|
||||
print(f" - {v}")
|
||||
print()
|
||||
print("Why: runners bind-mount a host-shared GOCACHE/GOMODCACHE")
|
||||
print(" (/var/cache/ci-go-{build,mod}, operator-config")
|
||||
print(" ops/runners/config.dedicated.yaml). actions/cache untars OVER")
|
||||
print(" the bind mount -> 'File exists' -> partial cache -> race-link")
|
||||
print(" / arch-lint failures. The runner-level GOCACHE is the cache SSOT.")
|
||||
print(" Fix: add `cache: false` under the setup-go `with:` block.")
|
||||
return 1
|
||||
print("OK: every actions/setup-go step sets cache: false.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -129,6 +129,13 @@ jobs:
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
# cache:false — the self-hosted runner bind-mounts a persistent
|
||||
# GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod}); actions/cache is
|
||||
# redundant and corrupts it by untarring over the bind mount ("File
|
||||
# exists" -> "Failed to restore" -> partial cache -> linker/typecheck
|
||||
# errors on heavy jobs, e.g. test -race link "too many errors" and
|
||||
# go-arch-lint "without types"). Fleet sweep after the cp ci.yml find.
|
||||
cache: false
|
||||
- name: Go build + vet (workspace-server)
|
||||
working-directory: workspace-server
|
||||
run: |
|
||||
|
||||
@@ -133,6 +133,13 @@ jobs:
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
# cache:false — the self-hosted runner bind-mounts a persistent
|
||||
# GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod}); actions/cache is
|
||||
# redundant and corrupts it by untarring over the bind mount ("File
|
||||
# exists" -> "Failed to restore" -> partial cache -> linker/typecheck
|
||||
# errors on heavy jobs, e.g. test -race link "too many errors" and
|
||||
# go-arch-lint "without types"). Fleet sweep after the cp ci.yml find.
|
||||
cache: false
|
||||
- if: ${{ needs.changes.outputs.platform == 'true' }}
|
||||
run: go mod download
|
||||
- if: ${{ needs.changes.outputs.platform == 'true' }}
|
||||
|
||||
@@ -150,7 +150,9 @@ jobs:
|
||||
# GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod}); actions/cache is
|
||||
# redundant and corrupts it by untarring over the bind mount ("File
|
||||
# exists" -> "Failed to restore" -> partial cache -> linker/typecheck
|
||||
# errors on heavy jobs). Fleet sweep cp#698 missed this workflow.
|
||||
# errors on heavy jobs, e.g. test -race link "too many errors" and
|
||||
# go-arch-lint "without types"). Fleet sweep cp#698 missed this
|
||||
# workflow (found during the cp ci.yml sweep).
|
||||
cache: false
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
name: lint-setup-go-cache
|
||||
|
||||
# Static workflow-shape lint: forbid `actions/setup-go` caching on the
|
||||
# self-hosted fleet. Every setup-go step must set `cache: false`.
|
||||
#
|
||||
# Forbidden shape
|
||||
# ---------------
|
||||
# - `cache: true` (explicit), OR
|
||||
# - `cache-dependency-path` / `cache-key` set with no `cache:` (the
|
||||
# input DEFAULTS to true), OR
|
||||
# - a bare setup-go with no `cache:` at all (still default-true).
|
||||
#
|
||||
# Why this rule exists (2026-06-09/10 cross-repo rollout)
|
||||
# ------------------------------------------------------
|
||||
# The molecule self-hosted runners bind-mount a persistent, host-shared
|
||||
# GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod} — operator-config
|
||||
# ops/runners/config.dedicated.yaml + fleet template). When setup-go
|
||||
# turns on actions/cache, it untars its restored archive OVER that bind
|
||||
# mount -> "File exists" -> "Failed to restore" -> partial cache ->
|
||||
# downstream linker/typecheck failures on heavy jobs (test -race link
|
||||
# "too many errors", go-arch-lint "without types"). The runner-level
|
||||
# GOCACHE is the SSOT for caching; setup-go must not also cache.
|
||||
# Fix: add `cache: false` under the setup-go `with:` block.
|
||||
#
|
||||
# Coordination (sweep PRs)
|
||||
# ------------------------
|
||||
# The fleet sweep fix/setup-go-cache-vs-bind-mount (core#2524) removes
|
||||
# the remaining `cache: true` hits. Until it merges, this lint will
|
||||
# loudly list those hits — so it lands at `continue-on-error: true`
|
||||
# (advisory). FOLLOW-UP: after core#2524 merges and main is clean for
|
||||
# 3 days, flip continue-on-error -> false to make this a hard gate.
|
||||
# This PR already removes the default-true hits the sweep PR does not
|
||||
# touch (ci.yml, ci-arm64-advisory.yml, handlers-postgres-integration.yml,
|
||||
# weekly-platform-go.yml).
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '.gitea/workflows/**'
|
||||
- '.gitea/scripts/lint_setup_go_cache.py'
|
||||
- 'tests/test_lint_setup_go_cache.py'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- '.gitea/workflows/**'
|
||||
- '.gitea/scripts/lint_setup_go_cache.py'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: lint-setup-go-cache-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory lint enforcing the cache:false convention on the
|
||||
# self-hosted GOCACHE bind-mount fleet; flips to required after the
|
||||
# core#2524 sweep merges (see header). Not a merge gate yet.
|
||||
lint:
|
||||
name: lint-setup-go-cache
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
# Advisory until core#2524 sweep merges + 3 clean days, then flip false.
|
||||
# internal#881 Phase 3 mask — 14d forced-renewal cadence (flip after 3 clean days)
|
||||
continue-on-error: true # internal#881
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
- name: Run lint-setup-go-cache
|
||||
run: python3 .gitea/scripts/lint_setup_go_cache.py
|
||||
- name: Run lint-setup-go-cache unit tests
|
||||
run: |
|
||||
python -m pip install --quiet pytest
|
||||
python3 -m pytest tests/test_lint_setup_go_cache.py -q
|
||||
@@ -47,6 +47,13 @@ jobs:
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: stable
|
||||
# cache:false — the self-hosted runner bind-mounts a persistent
|
||||
# GOCACHE/GOMODCACHE (/var/cache/ci-go-{build,mod}); actions/cache is
|
||||
# redundant and corrupts it by untarring over the bind mount ("File
|
||||
# exists" -> "Failed to restore" -> partial cache -> linker/typecheck
|
||||
# errors on heavy jobs, e.g. test -race link "too many errors" and
|
||||
# go-arch-lint "without types"). Fleet sweep after the cp ci.yml find.
|
||||
cache: false
|
||||
|
||||
- name: Go mod download
|
||||
run: go mod download
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
"""Unit tests for lint_setup_go_cache — fixture catch + clean proofs."""
|
||||
import importlib.util
|
||||
import os
|
||||
import textwrap
|
||||
|
||||
import pytest
|
||||
|
||||
HERE = os.path.dirname(__file__)
|
||||
SCRIPT = os.path.join(HERE, "..", ".gitea", "scripts", "lint_setup_go_cache.py")
|
||||
spec = importlib.util.spec_from_file_location("lint_setup_go_cache", SCRIPT)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
|
||||
def _write(tmp_path, body):
|
||||
p = tmp_path / "wf.yml"
|
||||
p.write_text(textwrap.dedent(body))
|
||||
return str(p)
|
||||
|
||||
|
||||
def test_cache_true_explicit_flagged(tmp_path):
|
||||
p = _write(tmp_path, """\
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker-host
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: go.sum
|
||||
""")
|
||||
viols = mod.scan_file(p)
|
||||
assert len(viols) == 1
|
||||
assert "cache: true" in viols[0][1]
|
||||
|
||||
|
||||
def test_default_true_with_cachedep_flagged(tmp_path):
|
||||
p = _write(tmp_path, """\
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker-host
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache-dependency-path: go.sum
|
||||
""")
|
||||
viols = mod.scan_file(p)
|
||||
assert len(viols) == 1
|
||||
assert "defaults to true" in viols[0][1]
|
||||
|
||||
|
||||
def test_bare_setup_go_default_true_flagged(tmp_path):
|
||||
p = _write(tmp_path, """\
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker-host
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- run: go build ./...
|
||||
""")
|
||||
viols = mod.scan_file(p)
|
||||
assert len(viols) == 1
|
||||
assert "defaults to true" in viols[0][1]
|
||||
|
||||
|
||||
def test_cache_false_clean(tmp_path):
|
||||
p = _write(tmp_path, """\
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker-host
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: false
|
||||
cache-dependency-path: go.sum
|
||||
""")
|
||||
assert mod.scan_file(p) == []
|
||||
|
||||
|
||||
def test_no_setup_go_clean(tmp_path):
|
||||
p = _write(tmp_path, """\
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker-host
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- run: echo hi
|
||||
""")
|
||||
assert mod.scan_file(p) == []
|
||||
|
||||
|
||||
def test_multiple_steps_only_bad_flagged(tmp_path):
|
||||
p = _write(tmp_path, """\
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker-host
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: false
|
||||
- uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version: '1.25'
|
||||
cache: true
|
||||
""")
|
||||
viols = mod.scan_file(p)
|
||||
assert len(viols) == 1
|
||||
assert "cache: true" in viols[0][1]
|
||||
Reference in New Issue
Block a user