test curl status capture workflow lint #764
113
.gitea/scripts/lint-curl-status-capture.py
Normal file
113
.gitea/scripts/lint-curl-status-capture.py
Normal file
@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Lint workflow bash for curl status-code capture pollution.
|
||||
|
||||
The bad shape is:
|
||||
|
||||
HTTP_CODE=$(curl ... -w '%{http_code}' ... || echo "000")
|
||||
|
||||
`curl -w` writes the HTTP code to stdout before returning non-zero, so
|
||||
fallback output inside the same command substitution appends another code.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
|
||||
SELF = ".gitea/workflows/lint-curl-status-capture.yml"
|
||||
|
||||
|
||||
class Finding(NamedTuple):
|
||||
path: str
|
||||
snippet: str
|
||||
|
||||
|
||||
BAD_STATUS_CAPTURE = re.compile(
|
||||
r"""
|
||||
\$\(\s*
|
||||
curl\b
|
||||
[^)]*
|
||||
-w\s*['"]%\{http_code\}['"]
|
||||
[^)]*
|
||||
\|\|\s*
|
||||
(?:
|
||||
echo\s+['"]?000['"]?
|
||||
|
|
||||
printf\s+['"]000['"]
|
||||
)
|
||||
\s*\)
|
||||
""",
|
||||
re.DOTALL | re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
def _logical_shell(content: str) -> str:
|
||||
"""Collapse bash line continuations so one curl command is one string."""
|
||||
return re.sub(r"\\\s*\n\s*", " ", content)
|
||||
|
||||
|
||||
def scan_content(path: str, content: str) -> list[Finding]:
|
||||
flat = _logical_shell(content)
|
||||
return [
|
||||
Finding(path=path, snippet=re.sub(r"\s+", " ", match.group(0)).strip()[:160])
|
||||
for match in BAD_STATUS_CAPTURE.finditer(flat)
|
||||
]
|
||||
|
||||
|
||||
def scan_paths(paths: list[str]) -> list[Finding]:
|
||||
findings: list[Finding] = []
|
||||
for path in paths:
|
||||
if path == SELF:
|
||||
continue
|
||||
content = Path(path).read_text(encoding="utf-8")
|
||||
findings.extend(scan_content(path, content))
|
||||
return findings
|
||||
|
||||
|
||||
def default_paths() -> list[str]:
|
||||
return sorted(glob.glob(".gitea/workflows/*.yml"))
|
||||
|
||||
|
||||
def print_report(findings: list[Finding]) -> None:
|
||||
if not findings:
|
||||
print("OK No curl-status-capture pollution patterns detected")
|
||||
return
|
||||
|
||||
print(f"::error::Found {len(findings)} curl-status-capture pollution site(s):")
|
||||
for finding in findings:
|
||||
print(
|
||||
f"::error file={finding.path}::Curl status-capture pollution: "
|
||||
"'|| echo/printf 000' inside a $(curl ... -w '%{http_code}' ...) "
|
||||
"subshell. On non-2xx or connection failure, curl's -w writes a "
|
||||
"status, then exits non-zero, then the fallback appends another "
|
||||
"status. Fix: route -w into a tempfile so the exit code cannot "
|
||||
"pollute stdout."
|
||||
)
|
||||
print(f" matched: {finding.snippet}...")
|
||||
|
||||
print()
|
||||
print("Fix template:")
|
||||
print(" set +e")
|
||||
print(" curl ... -w '%{http_code}' >code.txt 2>/dev/null")
|
||||
print(" set -e")
|
||||
print(' HTTP_CODE=$(cat code.txt 2>/dev/null)')
|
||||
print(' [ -z "$HTTP_CODE" ] && HTTP_CODE="000"')
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("paths", nargs="*", help="workflow files to scan")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
paths = args.paths or default_paths()
|
||||
findings = scan_paths(paths)
|
||||
print_report(findings)
|
||||
return 1 if findings else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@ -30,10 +30,16 @@ name: Lint curl status-code capture
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths: ['.gitea/workflows/**']
|
||||
paths:
|
||||
- '.gitea/workflows/**'
|
||||
- '.gitea/scripts/lint-curl-status-capture.py'
|
||||
- 'tests/test_lint_curl_status_capture.py'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths: ['.gitea/workflows/**']
|
||||
paths:
|
||||
- '.gitea/workflows/**'
|
||||
- '.gitea/scripts/lint-curl-status-capture.py'
|
||||
- 'tests/test_lint_curl_status_capture.py'
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
@ -50,55 +56,4 @@ jobs:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Find curl ... -w '%{http_code}' ... || echo "000" subshells
|
||||
run: |
|
||||
set -uo pipefail
|
||||
# Multi-line aware: look for `$(curl ... -w '%{http_code}' ... || echo "000")`
|
||||
# subshell where the entire command-substitution wraps a curl that
|
||||
# ends with `|| echo "000"`. Must distinguish from the SAFE shape
|
||||
# `$(cat tempfile 2>/dev/null || echo "000")` — `cat` with a missing
|
||||
# tempfile produces empty stdout, no pollution.
|
||||
python3 <<'PY'
|
||||
import os, re, sys, glob
|
||||
|
||||
BAD_FILES = []
|
||||
|
||||
# Match the buggy substitution across newlines: $(curl ... -w '%{http_code}' ... || echo "000")
|
||||
# The `\\n` is the bash line-continuation that lets curl flags span lines.
|
||||
# We collapse continuation lines first, then look for the single-line bad pattern.
|
||||
PATTERN = re.compile(
|
||||
r'\$\(\s*curl\b[^)]*-w\s*[\'"]%\{http_code\}[\'"][^)]*\|\|\s*echo\s+"000"\s*\)',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
# Self-skip: this lint workflow contains the literal anti-pattern in
|
||||
# its own docstring — that's intentional, not a bug.
|
||||
SELF = ".gitea/workflows/lint-curl-status-capture.yml"
|
||||
|
||||
for f in sorted(glob.glob(".gitea/workflows/*.yml")):
|
||||
if f == SELF:
|
||||
continue
|
||||
with open(f) as fh:
|
||||
content = fh.read()
|
||||
# Collapse bash line-continuations (\\\n + leading whitespace)
|
||||
# into a single logical line so the regex can see the full
|
||||
# curl invocation as one chunk.
|
||||
flat = re.sub(r'\\\s*\n\s*', ' ', content)
|
||||
for m in PATTERN.finditer(flat):
|
||||
BAD_FILES.append((f, m.group(0)[:120]))
|
||||
|
||||
if not BAD_FILES:
|
||||
print("OK No curl-status-capture pollution patterns detected")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"::error::Found {len(BAD_FILES)} curl-status-capture pollution site(s):")
|
||||
for f, snippet in BAD_FILES:
|
||||
print(f"::error file={f}::Curl status-capture pollution: '|| echo \"000\"' inside a $(curl ... -w '%{{http_code}}' ...) subshell. On non-2xx or connection failure, curl's -w writes a status, then exits non-zero, then the || echo appends another '000' — producing 'HTTP 000000' or '409000' that fails comparisons silently. Fix: route -w into a tempfile so the exit code can't pollute stdout. See memory feedback_curl_status_capture_pollution.md.")
|
||||
print(f" matched: {snippet}...")
|
||||
print()
|
||||
print("Fix template:")
|
||||
print(' set +e')
|
||||
print(' curl ... -w \'%{http_code}\' >code.txt 2>/dev/null')
|
||||
print(' set -e')
|
||||
print(' HTTP_CODE=$(cat code.txt 2>/dev/null)')
|
||||
print(' [ -z "$HTTP_CODE" ] && HTTP_CODE="000"')
|
||||
sys.exit(1)
|
||||
PY
|
||||
python3 .gitea/scripts/lint-curl-status-capture.py
|
||||
|
||||
88
tests/test_lint_curl_status_capture.py
Normal file
88
tests/test_lint_curl_status_capture.py
Normal file
@ -0,0 +1,88 @@
|
||||
"""Tests for `.gitea/scripts/lint-curl-status-capture.py`.
|
||||
|
||||
Run:
|
||||
python3 -m pytest tests/test_lint_curl_status_capture.py -v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SCRIPT_PATH = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ ".gitea"
|
||||
/ "scripts"
|
||||
/ "lint-curl-status-capture.py"
|
||||
)
|
||||
|
||||
|
||||
def _load_module():
|
||||
spec = importlib.util.spec_from_file_location("lint_curl_status_capture", SCRIPT_PATH)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_finds_quoted_echo_fallback_pollution():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body -w "%{http_code}" https://example.test || echo "000")
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
assert "echo" in findings[0].snippet
|
||||
|
||||
|
||||
def test_finds_unquoted_echo_fallback_pollution():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body -w '%{http_code}' https://example.test || echo 000)
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
assert "echo" in findings[0].snippet
|
||||
|
||||
|
||||
def test_finds_printf_fallback_pollution():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body -w '%{http_code}' https://example.test || printf '000')
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
assert "printf" in findings[0].snippet
|
||||
|
||||
|
||||
def test_ignores_tempfile_fallback_after_curl():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
set +e
|
||||
curl -sS -o /tmp/body -w '%{http_code}' https://example.test >/tmp/code
|
||||
rc=$?
|
||||
set -e
|
||||
HTTP_CODE=$(cat /tmp/code 2>/dev/null || echo "000")
|
||||
[ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
"""
|
||||
|
||||
assert lint.scan_content("workflow.yml", content) == []
|
||||
|
||||
|
||||
def test_collapses_bash_line_continuations():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body \\
|
||||
-w "%{http_code}" \\
|
||||
https://example.test \\
|
||||
|| echo "000")
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
Loading…
Reference in New Issue
Block a user