From 0c9fda559ab95bb14907193a6a975abdc222dd09 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 16 Apr 2026 09:25:20 -0700 Subject: [PATCH] fix(ci): bypass docker login + macOS Keychain for image publish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling `docker login` and tried to coerce credsStore via increasingly elaborate config tricks. None worked. The latest publish-canvas-image and publish-platform-image runs on main are still failing with: error storing credentials - err: exit status 1, out: `User interaction is not allowed. (-25308)` Verified locally on the runner host (2026-04-16): `docker login` on macOS unconditionally writes credentials to osxkeychain after a successful login, regardless of the config presented to it. # I wrote this: { "auths": {}, "credsStore": "", "credHelpers": {} } # After `docker login --config ghcr.io ...` succeeded: { "auths": { "ghcr.io": {} }, # empty — auth is in Keychain "credsStore": "osxkeychain" # Docker rewrote it back } So `--config` flag, DOCKER_CONFIG env var, credsStore="" etc. all share the same fate: Docker re-enables osxkeychain after every successful login. The Mac mini runner is a launchd user agent with a locked Keychain, so storage fails with -25308. This PR replaces the `docker login` invocation entirely. We write `base64(user:pat)` directly into the disposable DOCKER_CONFIG's `auths` map. `docker/build-push-action@v5` and the daemon honor the auths map for push without ever calling `docker login`, so the Keychain is never involved. Same shape in both workflows: - publish-canvas-image.yml — single registry (ghcr.io) - publish-platform-image.yml — two registries (ghcr.io + registry.fly.io) Fly username remains literal "x". Security: - Token env vars never echoed. Heredoc writes the auth blob via `umask 077` (file mode 600). The temp config dir lives under RUNNER_TEMP and is reaped at job end. - Diagnostics preserved (docker version + binary ls + registry keys only, no values) so future runner permission regressions remain visible without leaking secrets. Equivalent to closed PR #464 — re-opening because main is still broken (verified by inspecting the most recent failure). The closing comment on #464 stated the issue was already addressed by #341, but it isn't. --- .github/workflows/publish-canvas-image.yml | 42 ++++++++---- .github/workflows/publish-platform-image.yml | 71 +++++++++++--------- 2 files changed, 68 insertions(+), 45 deletions(-) diff --git a/.github/workflows/publish-canvas-image.yml b/.github/workflows/publish-canvas-image.yml index c0c0323f..420aab85 100644 --- a/.github/workflows/publish-canvas-image.yml +++ b/.github/workflows/publish-canvas-image.yml @@ -44,22 +44,42 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Isolate Docker config (skip macOS Keychain) - # Same workaround as publish-platform-image.yml — launchd service - # runners can't reach the Keychain, so we write a disposable - # config.json with an empty credsStore to force auths-map storage. - # IMPORTANT: no indentation inside the heredoc — JSON must be valid. + - name: Configure GHCR auth (write auths map; do NOT call docker login) + # `docker login` on macOS unconditionally writes credentials to the + # osxkeychain credential helper, even when DOCKER_CONFIG/config.json + # declares `credsStore: ""` and even when invoked with `--config`. + # Verified locally 2026-04-16 — after a successful login, Docker + # rewrites the same config file to: + # { "auths": { "ghcr.io": {} }, "credsStore": "osxkeychain" } + # i.e. the auth lives in the Keychain, not the config file. The + # Mac mini runner is a launchd user agent with a locked Keychain, + # so storage fails with `User interaction is not allowed (-25308)`. + # + # Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling + # `docker login` and tried to coerce credsStore — none worked. + # The only reliable fix is to skip `docker login` entirely and write + # the auth string directly. `docker/build-push-action@v5` and the + # daemon honor the `auths` map for push without needing login. shell: bash + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - set -euo pipefail + set -eu mkdir -p "${RUNNER_TEMP}/docker-config" - printf '{"auths":{},"credsStore":"","credHelpers":{}}\n' > "${RUNNER_TEMP}/docker-config/config.json" + AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64) + umask 077 + cat > "${RUNNER_TEMP}/docker-config/config.json" <> "${GITHUB_ENV}" - echo "=== config.json ===" - cat "${RUNNER_TEMP}/docker-config/config.json" + # Diagnostics that don't leak the token. echo "=== docker ===" command -v docker || echo "(docker not in PATH)" docker --version 2>&1 || true + ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true + echo "=== auths registries (no values) ===" + grep -o '"[a-zA-Z0-9.-]*\.io"' "${RUNNER_TEMP}/docker-config/config.json" || true - name: Set up QEMU # Apple-silicon runner building linux/amd64 images for x86 hosts. @@ -70,10 +90,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Log in to GHCR - shell: bash - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin - - name: Compute tags id: tags shell: bash diff --git a/.github/workflows/publish-platform-image.yml b/.github/workflows/publish-platform-image.yml index 860d24e6..11ee1aa7 100644 --- a/.github/workflows/publish-platform-image.yml +++ b/.github/workflows/publish-platform-image.yml @@ -40,39 +40,54 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Isolate Docker config (skip keychain) - # The Mac mini self-hosted runner runs as a non-interactive - # launchd service; docker/login-action's default credential store - # is the macOS Keychain, which raises - # error storing credentials - err: exit status 1, out: - # `User interaction is not allowed. (-25308)` - # without an unlocked desktop session. + - name: Configure registry auth (write auths map; do NOT call docker login) + # `docker login` on macOS unconditionally writes credentials to the + # osxkeychain credential helper, even when DOCKER_CONFIG/config.json + # declares `credsStore: ""` and even when invoked with `--config`. + # Verified locally 2026-04-16 — after a successful login, Docker + # rewrites the same config file to: + # { "auths": { "ghcr.io": {} }, "credsStore": "osxkeychain" } + # i.e. the auth lives in the Keychain, not the config file. The + # Mac mini runner is a launchd user agent with a locked Keychain, + # so storage fails with `User interaction is not allowed (-25308)`. # - # Point DOCKER_CONFIG at a per-run temp dir. IMPORTANT: writing - # `{"auths": {}}` alone is NOT enough — Docker on macOS picks up - # `osxkeychain` as the default credential store even when - # config.json doesn't declare one, inheriting from Docker - # Desktop's bundled credsStore binding. We must explicitly set - # `credsStore` to an empty string AND clear `credHelpers` so the - # login step writes credentials into the auths map of this - # disposable config.json rather than reaching for the keychain. - # (First tried in #273 without the empty-credsStore line; #319 - # + #322 merges showed it still regressed.) + # Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling + # `docker login` and tried to coerce credsStore — none worked. + # The only reliable fix is to skip `docker login` entirely and write + # the auth strings directly. `docker/build-push-action@v5` and the + # daemon honor the `auths` map for push without needing login. # - # Plus diagnostics: print the docker path so a future EACCES on - # /usr/local/bin/docker surfaces in the log instead of via a - # cryptic docker-login failure mid-step. + # Fly registry username MUST be literal "x" (verified 2026-04-15) — + # any other value returns 401. FLY_API_TOKEN lives in GitHub Actions + # secrets AND in `fly secrets` on molecule-cp; see + # docs/runbooks/saas-secrets.md before rotating. shell: bash + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + FLY_TOKEN: ${{ secrets.FLY_API_TOKEN }} run: | - set -euo pipefail + set -eu mkdir -p "${RUNNER_TEMP}/docker-config" - printf '{"auths":{},"credsStore":"","credHelpers":{}}\n' > "${RUNNER_TEMP}/docker-config/config.json" + GHCR_AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64) + FLY_AUTH=$(printf '%s:%s' 'x' "${FLY_TOKEN}" | base64) + umask 077 + cat > "${RUNNER_TEMP}/docker-config/config.json" <> "${GITHUB_ENV}" - echo "=== config.json ===" - cat "${RUNNER_TEMP}/docker-config/config.json" + # Diagnostics that don't leak the tokens. echo "=== docker ===" command -v docker || echo "(docker not in PATH)" docker --version 2>&1 || true + ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true + echo "=== auths registries (no values) ===" + grep -o '"[a-zA-Z0-9.-]*\.io"' "${RUNNER_TEMP}/docker-config/config.json" || true - name: Set up QEMU # Required on the Apple-silicon self-hosted runner — Fly tenant machines @@ -87,14 +102,6 @@ jobs: # builds without local docker daemon wrangling. uses: docker/setup-buildx-action@v3 - - name: Log in to GHCR - shell: bash - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin - - - name: Log in to Fly registry - shell: bash - run: echo "${{ secrets.FLY_API_TOKEN }}" | docker login registry.fly.io -u x --password-stdin - - name: Compute tags id: tags # Emit two tags per build: `latest` (floating, always the main tip)