fix(clone-manifest): don't block tokenless dev bootstrap on private templates #3193
@@ -114,3 +114,13 @@ jobs:
|
||||
# contract (e.g. accepting skipped/failure as success, or re-inlining
|
||||
# the logic into ci.yml) fails CI.
|
||||
run: bash .gitea/scripts/tests/test_all_required_check.sh
|
||||
|
||||
- name: clone-manifest.sh tolerant-bootstrap contract (network-free)
|
||||
# Regression lock for the strict/best-effort split in clone-manifest.sh:
|
||||
# tokenless dev bootstrap must skip ONLY `private: true` repos and still
|
||||
# hard-fail on any public-repo clone failure (so a real outage / bad ref
|
||||
# is never swallowed as a missing-creds skip). git-stubbed, no network.
|
||||
run: |
|
||||
set -euo pipefail
|
||||
command -v jq >/dev/null 2>&1 || { sudo apt-get update -qq && sudo apt-get install -y -qq jq; }
|
||||
sh scripts/test-clone-manifest-tolerant.sh
|
||||
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"_comment": "Platform template registry. Repos may be public or platform-private; CI and runtime template-cache refresh clone them with the SSOT-managed template read token, then strip .git metadata before use. Customer/private tenant templates remain outside this platform manifest.",
|
||||
"_comment": "Platform template registry. Repos may be public or platform-private; CI and runtime template-cache refresh clone them with the SSOT-managed template read token, then strip .git metadata before use. Customer/private tenant templates remain outside this platform manifest. An entry with `\"private\": true` requires MOLECULE_GITEA_TOKEN to clone — clone-manifest.sh's best-effort (tokenless) mode SKIPS exactly these and still hard-fails on any UNMARKED (public) repo that fails to clone, so a real outage / bad ref / deleted public repo is never silently swallowed as a missing-creds skip.",
|
||||
"_pinning_contract": "RFC #2927 — every entry's `ref` is pinned to an immutable commit SHA (not a branch like `main` and not a mutable tag). The previous `ref:main` exposure made provisioning non-reproducible — a merge to ANY template's `main` instantly reached every subsequent provision. Pinning restores: (a) reproducible identity (same SHA → same config.yaml + prompts + skills on every boot); (b) auditable provenance (the SHA is the artifact's content-address); (c) explicit upgrades (bumping a pin is a reviewed PR, not silent). CI test TestManifest_RefPinningCompleteness (workspace-server/internal/handlers/manifest_pinning_test.go) asserts the pinning contract: (1) every ref is a 40-char commit SHA, (2) every pinned SHA is reachable in the named repo, (3) workspace_template entries include config.yaml in the pinned ref's tree. To bump a pin: PR with the new SHA, tests run, driver reviews the diff. PLATFORM-AGENT is now pinned (its config.yaml exists at the pinned SHA): clone-manifest.sh stages it at .tenant-bundle-deps/workspace-configs-templates/platform-agent, which Dockerfile.platform-agent COPYs to bake the concierge identity into the molecule-platform-agent image (publish-workspace-server-image.yml). The concierge (kind=platform) provisions on that image (core#2495).",
|
||||
"version": 1,
|
||||
"plugins": [
|
||||
@@ -30,9 +30,9 @@
|
||||
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "ca7e1efafb982f6d97a6a188067fd9198b2f18b7"},
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "143e69b56f2530433141f5a87373e8a76578c52e"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "070447a0afdf66ae6f2bb166ac3e2b2884456951"},
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "3f9fd7ef6ea4dd912bb65446607f3c3c991ea76e"},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "f6a18eb4716a040fb24c5fb79830c50c5368a2da", "runtime": "claude-code"},
|
||||
{"name": "platform-agent", "repo": "molecule-ai/molecule-ai-workspace-template-platform-agent", "ref": "776916df9ada9c03b761f9485b2ed0933a9c9140", "runtime": "claude-code"}
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "3f9fd7ef6ea4dd912bb65446607f3c3c991ea76e", "private": true},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "f6a18eb4716a040fb24c5fb79830c50c5368a2da", "runtime": "claude-code", "private": true},
|
||||
{"name": "platform-agent", "repo": "molecule-ai/molecule-ai-workspace-template-platform-agent", "ref": "776916df9ada9c03b761f9485b2ed0933a9c9140", "runtime": "claude-code", "private": true}
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "990d7b23f65dadd7afe05958a77eeb74082b4feb"},
|
||||
|
||||
+95
-17
@@ -7,11 +7,20 @@
|
||||
#
|
||||
# Requires: git, jq (lighter than python3 — ~2MB vs ~50MB in Alpine)
|
||||
#
|
||||
# Auth (optional):
|
||||
# Repos in manifest.json may be public or platform-private. CI and
|
||||
# operator refresh jobs should set MOLECULE_GITEA_TOKEN to the
|
||||
# SSOT-managed template read token. Anonymous clone still works for
|
||||
# public entries, but private platform templates depend on the token.
|
||||
# Auth (optional) — two modes, keyed on MOLECULE_GITEA_TOKEN:
|
||||
# STRICT (token set; CI / operator refresh): the token grants access to
|
||||
# the private platform templates, so ANY clone failure is a genuine
|
||||
# error and aborts (exit 1). This is the build-correctness path.
|
||||
# BEST-EFFORT (no token; ecosystem contributor via setup.sh/dev-start.sh):
|
||||
# a contributor shouldn't need creds to spin up a local dev env. Clone
|
||||
# what's public; SKIP (with a warning) ONLY repos the manifest marks
|
||||
# `"private": true` — those need a token. A failure of any UNMARKED
|
||||
# (public) repo still ABORTS (exit 1), so a bad ref / deleted repo /
|
||||
# network outage is never swallowed as a missing-creds skip. Exit 0 when
|
||||
# the only failures were private skips. The palette is then sparse but
|
||||
# the platform runs.
|
||||
# Set MOLECULE_GITEA_TOKEN to the SSOT-managed template read token to
|
||||
# populate the full set.
|
||||
#
|
||||
# The token (when set) never enters the Docker image: this script runs
|
||||
# in the trusted CI context BEFORE `docker buildx build`, populates
|
||||
@@ -38,6 +47,24 @@ MANIFEST_JSON="$(_strip_comments)"
|
||||
|
||||
EXPECTED=0
|
||||
CLONED=0
|
||||
SKIPPED=0
|
||||
|
||||
# Strict vs best-effort mode.
|
||||
#
|
||||
# STRICT=1 when MOLECULE_GITEA_TOKEN is set (CI / operator refresh): the
|
||||
# token grants access to the private platform templates, so ANY clone
|
||||
# failure is a genuine error and must fail the build.
|
||||
#
|
||||
# STRICT=0 when no token is set (ecosystem contributor running
|
||||
# infra/scripts/setup.sh → dev-start.sh): the private platform templates
|
||||
# (seo-agent, platform-agent, google-adk — internal IP) are simply not
|
||||
# fetchable. Hard-failing here blocked local bootstrap on creds a
|
||||
# contributor doesn't have (and shouldn't need). In this mode we clone what
|
||||
# is public, SKIP what we can't access with a warning, and exit 0 — the
|
||||
# Canvas template palette is then sparse but the platform runs. We still
|
||||
# fail loudly if even the PUBLIC repos can't be cloned (real network /
|
||||
# manifest breakage, not just missing creds).
|
||||
if [ -n "${MOLECULE_GITEA_TOKEN:-}" ]; then STRICT=1; else STRICT=0; fi
|
||||
|
||||
# clone_one_with_retry — clone a single repo, retrying on transient failure.
|
||||
#
|
||||
@@ -56,10 +83,13 @@ CLONED=0
|
||||
# The durable fix is more runner RAM/swap (tracked with Infra-SRE); this
|
||||
# just stops a single flake from being release-blocking.
|
||||
#
|
||||
# Args: <target_dir> <name> <clone_url> <display_url> <ref>
|
||||
# Args: <target_dir> <name> <clone_url> <display_url> <ref> [max_attempts]
|
||||
# max_attempts defaults to 3 (CI: retry transient SIGKILL/network flakes).
|
||||
# Best-effort callers pass 1 — a tokenless private-repo clone fails on auth,
|
||||
# not a transient flake, so retrying just wastes the backoff window.
|
||||
clone_one_with_retry() {
|
||||
local tdir="$1" name="$2" url="$3" display="$4" ref="$5"
|
||||
local attempt=1 max_attempts=3 backoff
|
||||
local attempt=1 max_attempts="${6:-3}" backoff
|
||||
|
||||
while : ; do
|
||||
# A killed attempt can leave a partial directory behind; git clone
|
||||
@@ -85,7 +115,11 @@ clone_one_with_retry() {
|
||||
fi
|
||||
|
||||
if [ "$attempt" -ge "$max_attempts" ]; then
|
||||
echo "::error::clone failed after ${max_attempts} attempts: ${display}" >&2
|
||||
# Single-attempt best-effort callers handle their own (friendlier)
|
||||
# messaging; only the retrying CI path emits the ::error:: annotation.
|
||||
if [ "$max_attempts" -gt 1 ]; then
|
||||
echo "::error::clone failed after ${max_attempts} attempts: ${display}" >&2
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
backoff=$((attempt * 3)) # 3s, then 6s
|
||||
@@ -107,10 +141,15 @@ clone_category() {
|
||||
|
||||
local i=0
|
||||
while [ "$i" -lt "$count" ]; do
|
||||
local name repo ref
|
||||
local name repo ref private
|
||||
name=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].name")
|
||||
repo=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].repo")
|
||||
ref=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].ref // \"main\"")
|
||||
# `private: true` marks repos that REQUIRE a token to clone. Only
|
||||
# these may be skipped in best-effort (tokenless) mode; an unmarked
|
||||
# (public) repo that fails is a genuine error and must fail the run
|
||||
# even without a token. (manifest.json _comment.)
|
||||
private=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].private // false")
|
||||
|
||||
# Idempotent: skip if the target already looks populated. Lets the
|
||||
# README quickstart rerun setup.sh safely without having to delete
|
||||
@@ -140,8 +179,33 @@ clone_category() {
|
||||
fi
|
||||
|
||||
echo " cloning $display_url -> $target_dir/$name (ref=$ref)"
|
||||
clone_one_with_retry "$target_dir" "$name" "$clone_url" "$display_url" "$ref"
|
||||
CLONED=$((CLONED + 1))
|
||||
if [ "$STRICT" -eq 1 ]; then
|
||||
# Token present → genuine clone. Retry transient flakes; a final
|
||||
# failure is a real error and must abort the build.
|
||||
if clone_one_with_retry "$target_dir" "$name" "$clone_url" "$display_url" "$ref" 3; then
|
||||
CLONED=$((CLONED + 1))
|
||||
else
|
||||
echo "::error::clone failed for '$name' ($display_url) with MOLECULE_GITEA_TOKEN set — genuine failure, not a missing-creds skip" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
# No token → best effort. A failure is only TOLERATED for a repo
|
||||
# explicitly marked `private: true` (needs creds we don't have).
|
||||
# A failure of any UNMARKED (public) repo — bad ref, deleted repo,
|
||||
# DNS/network outage, git regression, Gitea non-auth error — is a
|
||||
# GENUINE error and must still abort, so a real outage can never be
|
||||
# silently swallowed as a missing-creds skip.
|
||||
if clone_one_with_retry "$target_dir" "$name" "$clone_url" "$display_url" "$ref" 1; then
|
||||
CLONED=$((CLONED + 1))
|
||||
elif [ "$private" = "true" ]; then
|
||||
echo " ⚠ skipping '$name' — marked private and MOLECULE_GITEA_TOKEN is unset (set the token to include it). Bootstrap continues with a reduced template palette." >&2
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
rm -rf "$target_dir/$name" # drop any partial dir so a later token-backed run re-clones cleanly
|
||||
else
|
||||
echo "::error::clone failed for PUBLIC repo '$name' ($display_url) — genuine failure (not a missing-creds skip). Check the manifest ref / network / repo existence. (If this repo is actually private, mark it \"private\": true in manifest.json.)" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
@@ -158,10 +222,24 @@ clone_category "org_templates" "$ORG_DIR"
|
||||
echo "==> Cloning plugins..."
|
||||
clone_category "plugins" "$PLUGINS_DIR"
|
||||
|
||||
# Verify all repos were cloned
|
||||
if [ "$CLONED" -ne "$EXPECTED" ]; then
|
||||
echo "::error::Expected $EXPECTED repos but only cloned $CLONED — some clones failed"
|
||||
exit 1
|
||||
# Verify the outcome.
|
||||
if [ "$STRICT" -eq 1 ]; then
|
||||
# Token present: every expected repo must have cloned.
|
||||
if [ "$CLONED" -ne "$EXPECTED" ]; then
|
||||
echo "::error::Expected $EXPECTED repos but only cloned $CLONED — some clones failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "==> Done. $CLONED/$EXPECTED repos cloned successfully."
|
||||
else
|
||||
# No token, and we got here — so every failure was a tolerated `private`
|
||||
# skip (any PUBLIC-repo failure would already have exited 1 above). A real
|
||||
# outage can't reach this point: it fails the public clones first. setup.sh
|
||||
# tolerates an empty palette (the platform falls through to a bare
|
||||
# default), so we exit 0. CLONED==0 here just means every manifest entry
|
||||
# was marked private — warn loudly, but still don't block bootstrap.
|
||||
if [ "$CLONED" -eq 0 ] && [ "$EXPECTED" -gt 0 ]; then
|
||||
echo " ⚠ WARNING: 0/$EXPECTED template/plugin repos cloned ($SKIPPED private, skipped) — every manifest entry needs MOLECULE_GITEA_TOKEN. The platform will start with an EMPTY template palette. Set the token to populate it." >&2
|
||||
else
|
||||
echo "==> Done (best-effort, no MOLECULE_GITEA_TOKEN). $CLONED/$EXPECTED cloned, $SKIPPED skipped (marked private; set the token to include them)."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "==> Done. $CLONED/$EXPECTED repos cloned successfully."
|
||||
|
||||
Executable
+127
@@ -0,0 +1,127 @@
|
||||
#!/bin/sh
|
||||
# test-clone-manifest-tolerant.sh — local, network-free test of the
|
||||
# strict/best-effort behavior in clone-manifest.sh.
|
||||
#
|
||||
# Stubs `git` so a clone fails for a repo listed in $PRIVATE_REPOS when the
|
||||
# URL is anonymous (no oauth2: userinfo) and for any repo in $HARD_FAIL_REPOS
|
||||
# (fails even with a token) — mirroring real Gitea. The SKIP decision in
|
||||
# clone-manifest.sh is driven by the manifest's `"private": true` flag, NOT by
|
||||
# which repo the stub fails, so these tests prove the safety boundary:
|
||||
#
|
||||
# A. no token → public clone, MARKED-private skip, exit 0
|
||||
# B. token set → every repo clones, exit 0
|
||||
# C. token set + genuine failure → exit 1 (strict)
|
||||
# E. no token + a PUBLIC (unmarked) repo hard-fails → exit 1
|
||||
# (the key negative case: best-effort must NOT swallow public failures)
|
||||
# D. no token + EVERY entry marked private → exit 0 + empty-palette warning
|
||||
#
|
||||
# Run: sh scripts/test-clone-manifest-tolerant.sh
|
||||
# Exit: 0 all pass, 1 otherwise.
|
||||
|
||||
set -eu
|
||||
|
||||
HERE=$(dirname -- "$0")
|
||||
HERE=$(cd -- "$HERE" && pwd)
|
||||
CLONE_SH="$HERE/clone-manifest.sh"
|
||||
WORK=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK"' EXIT
|
||||
mkdir -p "$WORK/bin"
|
||||
|
||||
# git stub. Fails clone when:
|
||||
# - repo basename is in $HARD_FAIL_REPOS (fails even with a token), OR
|
||||
# - repo basename is in $PRIVATE_REPOS and the URL is anonymous.
|
||||
cat > "$WORK/bin/git" <<'STUB'
|
||||
#!/bin/sh
|
||||
[ "$1" = "clone" ] || exit 0
|
||||
url=""; target=""
|
||||
for a in "$@"; do
|
||||
case "$a" in https://*) url="$a" ;; esac
|
||||
target="$a"
|
||||
done
|
||||
repo=$(basename "$url" .git)
|
||||
case " ${HARD_FAIL_REPOS:-} " in *" $repo "*) exit 1 ;; esac
|
||||
authed=0; case "$url" in *oauth2:*) authed=1 ;; esac
|
||||
case " ${PRIVATE_REPOS:-} " in
|
||||
*" $repo "*) [ "$authed" -eq 1 ] || exit 1 ;;
|
||||
esac
|
||||
mkdir -p "$target" && echo stub > "$target/STUB"
|
||||
exit 0
|
||||
STUB
|
||||
chmod +x "$WORK/bin/git"
|
||||
|
||||
# Default manifest: 2 public + 1 MARKED-private workspace template.
|
||||
write_manifest() { cat > "$WORK/manifest.json"; }
|
||||
write_manifest <<'JSON'
|
||||
{
|
||||
"version": 1, "plugins": [], "org_templates": [],
|
||||
"workspace_templates": [
|
||||
{"name": "pub-a", "repo": "molecule-ai/pub-a", "ref": "main"},
|
||||
{"name": "priv-x", "repo": "molecule-ai/priv-x", "ref": "main", "private": true},
|
||||
{"name": "pub-b", "repo": "molecule-ai/pub-b", "ref": "main"}
|
||||
]
|
||||
}
|
||||
JSON
|
||||
|
||||
fail() { echo "FAIL: $1"; exit 1; }
|
||||
# run <env opts/assignments...> → exit code preserved, output in $OUT.
|
||||
# env opts (-u) must precede NAME=VALUE assignments (BSD env), so "$@" goes first.
|
||||
run() {
|
||||
OUT=$(env "$@" PATH="$WORK/bin:$PATH" \
|
||||
sh "$CLONE_SH" "$WORK/manifest.json" "$WORK/ws" "$WORK/org" "$WORK/plugins" 2>&1)
|
||||
rc=$?
|
||||
printf '%s\n' "$OUT" > "$WORK/last.out"
|
||||
return $rc
|
||||
}
|
||||
reset() { rm -rf "$WORK/ws" "$WORK/org" "$WORK/plugins"; }
|
||||
|
||||
# --- A. no token → marked-private skipped, public cloned, exit 0 ----------
|
||||
reset
|
||||
if run -u MOLECULE_GITEA_TOKEN PRIVATE_REPOS="priv-x"; then :; else fail "A: tokenless run should exit 0 (got $?)"; fi
|
||||
[ -f "$WORK/ws/pub-a/STUB" ] && [ -f "$WORK/ws/pub-b/STUB" ] || fail "A: public repos not cloned"
|
||||
[ -d "$WORK/ws/priv-x" ] && fail "A: private repo should have been skipped"
|
||||
echo "$OUT" | grep -q "skipping 'priv-x'" || fail "A: missing skip warning for priv-x"
|
||||
echo "$OUT" | grep -q "2/3 cloned, 1 skipped" || fail "A: summary wrong: $(echo "$OUT" | tail -1)"
|
||||
echo "ok A: tokenless → 2 public cloned, 1 marked-private skipped, exit 0"
|
||||
|
||||
# --- B. token set → all clone, exit 0 ------------------------------------
|
||||
reset
|
||||
if run MOLECULE_GITEA_TOKEN=tok PRIVATE_REPOS="priv-x"; then :; else fail "B: tokened run should exit 0"; fi
|
||||
[ -f "$WORK/ws/priv-x/STUB" ] || fail "B: private repo should clone with token"
|
||||
echo "$OUT" | grep -q "3/3 repos cloned successfully" || fail "B: summary wrong: $(echo "$OUT" | tail -1)"
|
||||
echo "ok B: with token → all 3 cloned, exit 0"
|
||||
|
||||
# --- C. token set + genuine failure → exit 1 -----------------------------
|
||||
reset
|
||||
if run MOLECULE_GITEA_TOKEN=tok HARD_FAIL_REPOS=pub-b; then
|
||||
fail "C: a genuine clone failure with token set must exit 1"
|
||||
fi
|
||||
echo "$OUT" | grep -q "genuine failure" || fail "C: missing genuine-failure error"
|
||||
echo "ok C: with token + real failure → exit 1 (strict preserved)"
|
||||
|
||||
# --- E. no token + PUBLIC repo hard-fails → exit 1 (the key boundary) -----
|
||||
# pub-a clones, priv-x is skipped (marked private), pub-b fails and is NOT
|
||||
# marked private → must abort. Proves best-effort does NOT swallow a genuine
|
||||
# public failure (bad ref / deleted repo / outage), even after a success.
|
||||
reset
|
||||
if run -u MOLECULE_GITEA_TOKEN PRIVATE_REPOS="priv-x" HARD_FAIL_REPOS=pub-b; then
|
||||
fail "E: tokenless run must exit 1 when a PUBLIC repo fails to clone"
|
||||
fi
|
||||
echo "$OUT" | grep -q "PUBLIC repo 'pub-b'" || fail "E: missing public-failure error for pub-b"
|
||||
echo "ok E: tokenless + public hard-fail → exit 1 (no fail-open on public repos)"
|
||||
|
||||
# --- D. no token + EVERY entry marked private → exit 0 + warning ----------
|
||||
write_manifest <<'JSON'
|
||||
{
|
||||
"version": 1, "plugins": [], "org_templates": [],
|
||||
"workspace_templates": [
|
||||
{"name": "priv-a", "repo": "molecule-ai/priv-a", "ref": "main", "private": true},
|
||||
{"name": "priv-b", "repo": "molecule-ai/priv-b", "ref": "main", "private": true}
|
||||
]
|
||||
}
|
||||
JSON
|
||||
reset
|
||||
if run -u MOLECULE_GITEA_TOKEN PRIVATE_REPOS="priv-a priv-b"; then :; else fail "D: all-private tokenless run must exit 0 (got $?)"; fi
|
||||
echo "$OUT" | grep -q "EMPTY template palette" || fail "D: missing empty-palette warning"
|
||||
echo "ok D: tokenless + all-private → exit 0 with empty-palette warning"
|
||||
|
||||
echo "PASS: clone-manifest.sh tolerant tokenless bootstrap"
|
||||
Reference in New Issue
Block a user