ci(diagnostic): add runner-state probes to publish-workspace-server-image (internal#327 follow-up) #585

Closed
infra-lead wants to merge 2 commits from infra/diagnostic-publish-workspace-server-image into main

View File

@ -114,6 +114,41 @@ jobs:
run: |
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
# Diagnostic — surface the runner's docker/buildx/AWS state BEFORE the
# build steps so a failure here doesn't leave us guessing. Workflow has
# been red on main post-#572 (fix landed at the manifest-clone step,
# failure moved 9s→50s to a later step; suspect buildx setup or ECR
# auth). `if: always()` so this runs even if a prior step exits 1.
# TODO: remove once main is reliably green for ≥10 consecutive runs
# and the failure root is fully understood (track on internal#327
# follow-up).
- name: Diagnostic — docker/buildx/AWS state (pre-build)
if: always()
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
run: |
set +e # never fail the diagnostic itself
echo "::group::docker version"
docker version
echo "::endgroup::"
echo "::group::docker info (subset)"
docker info --format '{{.ServerVersion}} {{.Driver}} {{.CgroupDriver}} runtimes={{.Runtimes}}'
echo "::endgroup::"
echo "::group::docker buildx version (pre-setup)"
docker buildx version || echo ' (buildx not installed — setup-buildx-action will add it)'
echo "::endgroup::"
echo "::group::aws CLI presence + caller identity"
command -v aws && aws --version
aws sts get-caller-identity 2>&1 || echo ' (aws sts failed — credentials may be missing/invalid)'
echo "::endgroup::"
echo "::group::PATH + relevant env"
echo "PATH=$PATH"
env | grep -E '^(AWS_|DOCKER_|HOME|RUNNER_)' | sed 's/AWS_SECRET_ACCESS_KEY=.*/AWS_SECRET_ACCESS_KEY=***/'
echo "::endgroup::"
echo "Diagnostic completed at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
# Build + push platform image (inline ECR auth — mirrors the operator-host
# approach; credentials come from GITHUB_SECRET_AWS_ACCESS_KEY_ID /
# GITHUB_SECRET_AWS_SECRET_ACCESS_KEY in Gitea Actions).
@ -122,6 +157,20 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
- name: Diagnostic — buildx state (post-setup)
if: always()
run: |
set +e
echo "::group::docker buildx ls"
docker buildx ls
echo "::endgroup::"
echo "::group::docker buildx version (post-setup)"
docker buildx version
echo "::endgroup::"
echo "::group::active builder inspect"
docker buildx inspect --bootstrap 2>&1 | head -40
echo "::endgroup::"
- name: Build & push platform image to ECR (staging-<sha> + staging-latest)
env:
IMAGE_NAME: ${{ env.IMAGE_NAME }}