test(e2e): harden template-delivery asset assertions to settle (#37 / mc#2996 Phase 2a) #3023

Merged
core-devops merged 1 commits from fix/rfc2843-37-harden-delivery-e2e into main 2026-06-17 21:34:54 +00:00
2 changed files with 46 additions and 15 deletions
+20 -8
View File
@@ -14,12 +14,21 @@ name: template-delivery-e2e
# PLUGIN channel and NOT the asset channel (negative control).
#
# STAGED ROLLOUT (do NOT make required until green):
# Phase 1 (now): advisory — runs on the relevant paths + main + dispatch.
# Phase 1 (done): advisory — runs on the relevant paths + main + dispatch.
# Asserts the new two-channel contract.
# Phase 2 (after this goes green twice on the new contract): remove
# continue-on-error and add this check to branch protection
# required_status_checks so a future delivery regression is
# merge-blocking.
# Phase 2a (THIS change, mc#2996): HARDEN the asset-channel assertions so the
# gate is reliable enough to make required. The C (config.yaml)
# + D (prompts) checks now poll within E2E_ASSET_SETTLE_SECS: a
# freshly-online tenant's /configs inspection endpoint can be
# transiently slow / time out the first read (the 9c2161d red was
# `curl: (28) ... 0 bytes` → config read as size 0, a FALSE stub,
# not a real delivery failure). A genuine stub still fails after
# the budget. continue-on-error STAYS in this PR — the flip is
# gated by lint-pre-flip-continue-on-error on recent green main
# runs, which this hardening produces.
# Phase 2b (follow-up, after 2a is green on main): remove continue-on-error and
# add the emitted context to branch protection
# required_status_checks → a delivery regression is merge-blocking.
#
# Cost: provisions ONE throwaway tenant + ONE seo-agent (real EC2), teardown
# trap deletes the org even on failure. Path-filtered so it only runs when the
@@ -73,14 +82,17 @@ jobs:
# Job renamed for the RFC#2843 #32 two-channel contract (config+prompts via
# the asset channel; seo-all installs via the post-online plugin reconcile,
# not at boot). Renaming the job changes the emitted status context.
# bp-exempt: advisory Phase-1 gate (continue-on-error, mc#2996) — informational, not a required BP context.
# bp-exempt: Phase 2a — still advisory (continue-on-error) while the hardened
# asset assertions bank green main runs; lint-pre-flip-continue-on-error then
# permits the Phase-2b flip + branch-protection add (mc#2996).
delivery:
# No colon in the name — lint-required-context's PyYAML AST parse rejects an
# unquoted scalar containing a colon.
name: Template-asset delivery (fresh seo-agent — config+prompts via asset channel, seo-all via plugin reconcile)
runs-on: ubuntu-latest
# Phase 1: advisory. Remove this line in Phase 2 to make it merge-blocking.
# mc#2996 — Phase 2 promotion tracker (remove continue-on-error; forced 14d renewal cadence).
# Phase 2a: STILL advisory. The flip to required (remove this line + add to
# branch protection) is Phase 2b, gated by lint-pre-flip-continue-on-error on
# the green main runs this hardening produces. mc#2996.
continue-on-error: true # mc#2996
timeout-minutes: 30
env:
+26 -7
View File
@@ -56,6 +56,15 @@ EXPECTED_MODEL="${E2E_EXPECTED_MODEL:-moonshot/kimi-k2.6}"
EXPECTED_PLUGIN="${E2E_EXPECTED_PLUGIN:-seo-all}"
PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
PLUGIN_INSTALL_TIMEOUT_SECS="${E2E_PLUGIN_INSTALL_TIMEOUT_SECS:-600}"
# Settle budget for the ASSET-channel assertions (C config.yaml, D prompts).
# A freshly-online tenant's /configs inspection endpoint (execs into the
# container) can be transiently slow or time out the first read — observed:
# `curl: (28) ... 0 bytes` on a just-online box → config.yaml read as size 0 →
# false "stub" failure. The assertions poll within this budget and only FAIL
# after it expires, so a genuine stub still fails loudly (the gate's real
# signal) while a transient/early read no longer false-negatives. Required for
# this gate to be merge-blocking without flaking (mc#2996 Phase 2).
ASSET_SETTLE_SECS="${E2E_ASSET_SETTLE_SECS:-180}"
# Collision-proof slug (random suffix), same convention as the sibling harness.
RAND=$(head -c4 /dev/urandom | od -An -tx1 | tr -d ' \n')
@@ -160,21 +169,31 @@ MODEL=$(tenant_call GET "/workspaces/$WID/model" | python3 -c "import json,sys;p
ok "B: model=$MODEL"
# C. config.yaml delivered + REAL (not the 218 B default stub)
CFG_SIZE=$(tenant_call GET "/workspaces/$WID/files" | python3 -c "
# D. prompts/ delivered (identity prompt)
#
# Both read the tenant's /configs inspection endpoint, which can be transiently
# slow / time out on a just-online box. Poll within ASSET_SETTLE_SECS so a
# transient read or a not-yet-settled volume retries; only fail AFTER the budget
# (a real stub stays a stub). The LAST observed values feed the failure message.
ASSET_DEADLINE=$(( $(date +%s) + ASSET_SETTLE_SECS )); CFG_SIZE=0; PROMPTS=0
while true; do
CFG_SIZE=$(tenant_call GET "/workspaces/$WID/files" | python3 -c "
import json,sys
for f in json.load(sys.stdin):
if f.get('path')=='config.yaml': print(f.get('size',0)); break
else: print(0)
" 2>/dev/null || echo 0)
[ "${CFG_SIZE:-0}" -gt 1024 ] || fail "C: config.yaml size=$CFG_SIZE B (≤1KiB ⇒ default stub, template config NOT delivered)"
ok "C: config.yaml delivered ($CFG_SIZE B)"
# D. prompts/ delivered (identity prompt)
PROMPTS=$(tenant_call GET "/workspaces/$WID/files?path=prompts" | python3 -c "
PROMPTS=$(tenant_call GET "/workspaces/$WID/files?path=prompts" | python3 -c "
import json,sys
d=json.load(sys.stdin); print(len(d) if isinstance(d,list) else 0)
" 2>/dev/null || echo 0)
[ "${PROMPTS:-0}" -gt 0 ] || fail "D: prompts/ empty — identity prompt NOT delivered"
{ [ "${CFG_SIZE:-0}" -gt 1024 ] && [ "${PROMPTS:-0}" -gt 0 ]; } && break
[ "$(date +%s)" -gt "$ASSET_DEADLINE" ] && break
sleep 10
done
[ "${CFG_SIZE:-0}" -gt 1024 ] || fail "C: config.yaml size=$CFG_SIZE B after ${ASSET_SETTLE_SECS}s (≤1KiB ⇒ default stub, template config NOT delivered)"
ok "C: config.yaml delivered ($CFG_SIZE B)"
[ "${PROMPTS:-0}" -gt 0 ] || fail "D: prompts/ empty after ${ASSET_SETTLE_SECS}s — identity prompt NOT delivered"
ok "D: prompts/ delivered ($PROMPTS file(s))"
# E. plugins/seo-all/SKILL.md installed via the post-online PLUGIN reconcile —