fix(ci): hard-code MOLECULE_ENV in local-provision E2E + retry tenant image build #2470
@@ -74,6 +74,10 @@ jobs:
|
||||
env:
|
||||
PG_CONTAINER: pg-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
REDIS_CONTAINER: redis-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
|
||||
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
|
||||
MOLECULE_ENV: development
|
||||
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
@@ -254,6 +258,10 @@ jobs:
|
||||
env:
|
||||
PG_CONTAINER: pg-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
REDIS_CONTAINER: redis-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
|
||||
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
|
||||
MOLECULE_ENV: development
|
||||
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
|
||||
@@ -248,16 +248,36 @@ jobs:
|
||||
--tag "${STAGING_TENANT_IMAGE_NAME}:${TAG_LATEST}"
|
||||
)
|
||||
|
||||
docker buildx build \
|
||||
--file ./workspace-server/Dockerfile.tenant \
|
||||
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
||||
--build-arg GIT_SHA="${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
|
||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
|
||||
"${build_tags[@]}" \
|
||||
--push .
|
||||
# Retry loop: buildkit EOF (internal#2468) is often transient on the
|
||||
# publish runner under memory pressure. Up to 3 attempts with a fresh
|
||||
# builder each time so a crashed buildkit doesn't poison the next try.
|
||||
for attempt in 1 2 3; do
|
||||
echo "::notice::Tenant image build attempt ${attempt}/3 ..."
|
||||
builder="tenant-builder-${GITHUB_RUN_ID}-${attempt}"
|
||||
docker buildx create --name "${builder}" --use >/dev/null 2>&1 || true
|
||||
if docker buildx build \
|
||||
--builder "${builder}" \
|
||||
--file ./workspace-server/Dockerfile.tenant \
|
||||
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
||||
--build-arg GIT_SHA="${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
|
||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
|
||||
"${build_tags[@]}" \
|
||||
--push .; then
|
||||
docker buildx rm "${builder}" >/dev/null 2>&1 || true
|
||||
echo "::notice::Tenant image build succeeded on attempt ${attempt}"
|
||||
break
|
||||
fi
|
||||
echo "::warning::Tenant image build attempt ${attempt} failed — cleaning builder and retrying"
|
||||
docker buildx rm "${builder}" >/dev/null 2>&1 || true
|
||||
sleep 10
|
||||
if [ "$attempt" -eq 3 ]; then
|
||||
echo "::error::Tenant image build failed after 3 attempts"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# bp-exempt: production deploy side-effect; merge is gated by CI / all-required and this job waits for push CI before acting.
|
||||
deploy-production:
|
||||
|
||||
@@ -488,6 +488,12 @@ echo ""
|
||||
# Step 5 — proxy reach (ws-<id>:8000 Docker-DNS rewrite, end to end).
|
||||
# ----------------------------------------------------------------------------
|
||||
echo "--- Step 5: proxy reach (POST /workspaces/$WSID/a2a) ---"
|
||||
# Debug: print the workspace URL the platform stored so SSRF failures are
|
||||
# actionable (#2468 RCA).
|
||||
WS_DEBUG=$(admin_curl "$BASE/workspaces/$WSID")
|
||||
WS_URL_DEBUG=$(ws_field "$WS_DEBUG" "url")
|
||||
WS_STATUS_DEBUG=$(ws_field "$WS_DEBUG" "status")
|
||||
echo " workspace url=$WS_URL_DEBUG status=$WS_STATUS_DEBUG"
|
||||
# In minimax mode we send a DETERMINISTIC known-answer prompt and assert the
|
||||
# model echoes the answer back — proving a real LLM round-trip, not just
|
||||
# reachability. Otherwise a plain "ping".
|
||||
|
||||
Reference in New Issue
Block a user