ci(cascade): structural hardening — .gitea-aware probe + convergence assertion + PEP 440 enforcement #1603

Open
core-devops wants to merge 1 commits from core-devops/cascade-structural-hardening into main
+106 -3
View File
@@ -274,16 +274,39 @@ jobs:
git config --global user.name "publish-runtime cascade"
git config --global user.email "publish-runtime@moleculesai.app"
# PEP 440 strict regex (Fix #3 — RFC internal#613). Symmetric with
# the publisher-side check at publish-runtime.yml:101 — reject
# malformed values at the writer too, so a future caller that
# bypasses the publisher can't leak a non-PEP-440 string (the
# `# fire-publish-image-<epoch>` literal that b40c39ba1 injected
# into openclaw's .runtime-version slipped past head -n1 at
# provision time but is still a real fire-flag).
PEP440_RE='^[0-9]+\.[0-9]+\.[0-9]+(rc[0-9]+|a[0-9]+|b[0-9]+|\.post[0-9]+|\.dev[0-9]+)?$'
if ! echo "$VERSION" | grep -qE "$PEP440_RE"; then
echo "::error::cascade refusing to fan out non-PEP-440 value '$VERSION' — publisher contract violation"
exit 1
fi
WORKDIR="$(mktemp -d)"
for tpl in $TEMPLATES; do
REPO="molecule-ai/molecule-ai-workspace-template-$tpl"
CLONE="$WORKDIR/$tpl"
HTTP=$(curl -sS -o /dev/null -w "%{http_code}" \
# Fix #1 (RFC internal#613) — probe BOTH .github/ and .gitea/.
# The codex template only ports .gitea/workflows/ (no .github/
# mirror). The legacy .github/-only probe returned 404 on codex
# → soft-skip → codex never received .runtime-version → silent
# drift to PyPI floor (incident a66eb848). Soft-skip ONLY if
# NEITHER workflow file exists. Pairs with memory
# `feedback_per_repo_gitea_vs_github_actions_dir`.
HTTP_GH=$(curl -sS -o /dev/null -w "%{http_code}" \
-H "Authorization: token $DISPATCH_TOKEN" \
"$GITEA_URL/api/v1/repos/$REPO/contents/.github/workflows/publish-image.yml")
if [ "$HTTP" = "404" ]; then
echo "↷ $tpl has no publish-image.yml — soft-skip"
HTTP_GT=$(curl -sS -o /dev/null -w "%{http_code}" \
-H "Authorization: token $DISPATCH_TOKEN" \
"$GITEA_URL/api/v1/repos/$REPO/contents/.gitea/workflows/publish-image.yml")
if [ "$HTTP_GH" = "404" ] && [ "$HTTP_GT" = "404" ]; then
echo "↷ $tpl has no publish-image.yml in either .github/ or .gitea/ — soft-skip"
SKIPPED="$SKIPPED $tpl"
continue
fi
@@ -302,6 +325,15 @@ jobs:
fi
cd "$CLONE"
# Fix #3 (RFC internal#613) — re-validate at per-mirror write
# site (defense-in-depth in case future edits mutate $VERSION
# inside the loop, e.g. a per-template suffix).
if ! echo "$VERSION" | grep -qE "$PEP440_RE"; then
echo "::error::refusing to write non-PEP-440 value '$VERSION' to $tpl/.runtime-version"
FAILED="$FAILED $tpl"
cd - >/dev/null
break
fi
echo "$VERSION" > .runtime-version
if git diff --quiet -- .runtime-version; then
@@ -343,3 +375,74 @@ jobs:
else
echo "Cascade complete: $VERSION pinned across all manifest workspace_templates."
fi
# Fix #2 (RFC internal#613) — post-flight convergence assertion.
#
# The `cascade` job above writes .runtime-version to each non-skipped
# template, but until this job existed there was no read-back step
# asserting that every mirror ended up at the SAME canonical value.
# The openclaw `0.1.1000\n# fire-publish-image-…` literal (b40c39ba1)
# and the claude-code ↔ openclaw 0.1.129 ↔ 0.1.1000 divergence both
# went undetected for days because the head -n1 consumer in
# publish-image.yml masked the malformed line at provision time.
#
# This job fetches each template's .runtime-version via the Gitea
# contents API, head -n1 normalizes it (matches what publish-image.yml
# consumes), and compares to the canonical RUNTIME_VERSION. Loud failure
# on any divergence — Loki's gitea-actions scraper picks up the
# `::error::` line and the existing main-red-watchdog page fires.
cascade-converged:
needs: [publish, cascade]
runs-on: publish
steps:
- name: Assert all cascaded mirrors converged to canonical version
env:
DISPATCH_TOKEN: ${{ secrets.DISPATCH_TOKEN }}
RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
run: |
set +e
GITEA_URL="${GITEA_URL:-https://git.moleculesai.app}"
TEMPLATES="claude-code hermes openclaw codex langgraph crewai autogen deepagents gemini-cli"
DIVERGED=""
MISSING=""
OK=""
for tpl in $TEMPLATES; do
REPO="molecule-ai/molecule-ai-workspace-template-$tpl"
# Skip templates that have no publish-image.yml (matches Fix #1
# soft-skip semantics — those legitimately don't carry a pin).
HTTP_GH=$(curl -sS -o /dev/null -w "%{http_code}" \
-H "Authorization: token $DISPATCH_TOKEN" \
"$GITEA_URL/api/v1/repos/$REPO/contents/.github/workflows/publish-image.yml")
HTTP_GT=$(curl -sS -o /dev/null -w "%{http_code}" \
-H "Authorization: token $DISPATCH_TOKEN" \
"$GITEA_URL/api/v1/repos/$REPO/contents/.gitea/workflows/publish-image.yml")
if [ "$HTTP_GH" = "404" ] && [ "$HTTP_GT" = "404" ]; then
continue
fi
RV_B64=$(curl -sS -H "Authorization: token $DISPATCH_TOKEN" \
"$GITEA_URL/api/v1/repos/$REPO/contents/.runtime-version" \
| python -c "import sys,json; d=json.load(sys.stdin); print(d.get('content','').replace('\n',''))" 2>/dev/null)
if [ -z "$RV_B64" ]; then
echo "::error msg=cascade-divergence template=$tpl reason=missing-runtime-version::"
MISSING="$MISSING $tpl"
continue
fi
GOT=$(echo "$RV_B64" | base64 -d 2>/dev/null | head -n1 | tr -d '[:space:]')
if [ "$GOT" = "$RUNTIME_VERSION" ]; then
echo "✓ $tpl converged at $GOT"
OK="$OK $tpl"
else
echo "::error msg=cascade-divergence template=$tpl got=$GOT want=$RUNTIME_VERSION::"
DIVERGED="$DIVERGED $tpl(got=$GOT)"
fi
done
if [ -n "$DIVERGED" ] || [ -n "$MISSING" ]; then
echo "::error::Cascade convergence FAILED — diverged:$DIVERGED missing:$MISSING"
exit 1
fi
echo "Cascade convergence OK — all cascade-active mirrors at $RUNTIME_VERSION:$OK"