fix(ci): hard-code MOLECULE_ENV in local-provision E2E + retry tenant image build #2470

Merged
devops-engineer merged 1 commits from fix/main-red-e2e-ssrf-publish-retry into main 2026-06-09 02:56:26 +00:00
3 changed files with 44 additions and 10 deletions
+8
View File
@@ -74,6 +74,10 @@ jobs:
env:
PG_CONTAINER: pg-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
REDIS_CONTAINER: redis-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
MOLECULE_ENV: development
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
@@ -254,6 +258,10 @@ jobs:
env:
PG_CONTAINER: pg-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
REDIS_CONTAINER: redis-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
MOLECULE_ENV: development
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
@@ -248,16 +248,36 @@ jobs:
--tag "${STAGING_TENANT_IMAGE_NAME}:${TAG_LATEST}"
)
docker buildx build \
--file ./workspace-server/Dockerfile.tenant \
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
--build-arg GIT_SHA="${GIT_SHA}" \
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
--label "org.opencontainers.image.revision=${GIT_SHA}" \
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
"${build_tags[@]}" \
--push .
# Retry loop: buildkit EOF (internal#2468) is often transient on the
# publish runner under memory pressure. Up to 3 attempts with a fresh
# builder each time so a crashed buildkit doesn't poison the next try.
for attempt in 1 2 3; do
echo "::notice::Tenant image build attempt ${attempt}/3 ..."
builder="tenant-builder-${GITHUB_RUN_ID}-${attempt}"
docker buildx create --name "${builder}" --use >/dev/null 2>&1 || true
if docker buildx build \
--builder "${builder}" \
--file ./workspace-server/Dockerfile.tenant \
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
--build-arg GIT_SHA="${GIT_SHA}" \
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
--label "org.opencontainers.image.revision=${GIT_SHA}" \
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
"${build_tags[@]}" \
--push .; then
docker buildx rm "${builder}" >/dev/null 2>&1 || true
echo "::notice::Tenant image build succeeded on attempt ${attempt}"
break
fi
echo "::warning::Tenant image build attempt ${attempt} failed — cleaning builder and retrying"
docker buildx rm "${builder}" >/dev/null 2>&1 || true
sleep 10
if [ "$attempt" -eq 3 ]; then
echo "::error::Tenant image build failed after 3 attempts"
exit 1
fi
done
# bp-exempt: production deploy side-effect; merge is gated by CI / all-required and this job waits for push CI before acting.
deploy-production:
@@ -488,6 +488,12 @@ echo ""
# Step 5 — proxy reach (ws-<id>:8000 Docker-DNS rewrite, end to end).
# ----------------------------------------------------------------------------
echo "--- Step 5: proxy reach (POST /workspaces/$WSID/a2a) ---"
# Debug: print the workspace URL the platform stored so SSRF failures are
# actionable (#2468 RCA).
WS_DEBUG=$(admin_curl "$BASE/workspaces/$WSID")
WS_URL_DEBUG=$(ws_field "$WS_DEBUG" "url")
WS_STATUS_DEBUG=$(ws_field "$WS_DEBUG" "status")
echo " workspace url=$WS_URL_DEBUG status=$WS_STATUS_DEBUG"
# In minimax mode we send a DETERMINISTIC known-answer prompt and assert the
# model echoes the answer back — proving a real LLM round-trip, not just
# reachability. Otherwise a plain "ping".