diff --git a/.github/workflows/publish-runtime.yml b/.github/workflows/publish-runtime.yml deleted file mode 100644 index 6118c113..00000000 --- a/.github/workflows/publish-runtime.yml +++ /dev/null @@ -1,446 +0,0 @@ -name: publish-runtime - -# DEPRECATED on Gitea Actions — this file is kept for reference only. -# Gitea Actions reads .gitea/workflows/, not .github/workflows/. -# The canonical version is now: .gitea/workflows/publish-runtime.yml -# That port: -# - Drops OIDC trusted publisher (Gitea has no environments/OIDC) -# - Uses PYPI_TOKEN secret instead of gh-action-pypi-publish -# - Uses ${GITHUB_REF#refs/tags/} instead of github.ref_name -# - Drops staging branch trigger (staging branch does not exist) -# - Drops merge_group trigger (Gitea has no merge queue) -# -# Publishes molecule-ai-workspace-runtime to PyPI from monorepo workspace/. -# Monorepo workspace/ is the only source-of-truth for runtime code; this -# workflow is the bridge from monorepo edits to the PyPI artifact that -# the 8 workspace-template-* repos depend on. -# -# Triggered by: -# - Pushing a tag matching `runtime-vX.Y.Z` (the version is derived from -# the tag — `runtime-v0.1.6` publishes `0.1.6`). -# - Manual workflow_dispatch with an explicit `version` input (useful for -# dev/test releases without tagging the repo). -# - Auto: any push to `staging` that touches `workspace/**`. The version -# is derived by querying PyPI for the current latest and bumping the -# patch component. This closes the human-in-loop gap that caused the -# 2026-04-27 RuntimeCapabilities ImportError outage — adapter symbol -# additions in workspace/adapters/base.py used to require an operator -# to remember to publish; now the merge itself triggers the publish. -# -# The workflow: -# 1. Runs scripts/build_runtime_package.py to copy workspace/ → -# build/molecule_runtime/ with imports rewritten (`a2a_client` → -# `molecule_runtime.a2a_client`). -# 2. Builds wheel + sdist with `python -m build`. -# 3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC). -# No static API token is stored — PyPI verifies the workflow's -# OIDC claim against the trusted-publisher config registered for -# molecule-ai-workspace-runtime (molecule-ai/molecule-core, -# publish-runtime.yml, environment pypi-publish). -# -# After publish: the 8 template repos pick up the new version on their -# next image rebuild (their requirements.txt pin -# `molecule-ai-workspace-runtime>=0.1.0`, so any new release is eligible). -# To force-pull immediately, bump the pin in each template repo's -# requirements.txt and merge — that triggers their own publish-image.yml. - -on: - push: - tags: - - "runtime-v*" - branches: - - staging - paths: - # Auto-publish when staging gets changes that affect what gets - # published. Path filter ONLY applies to branch pushes — tag pushes - # still fire regardless. - # - # workspace/** is the source-of-truth for runtime code. - # scripts/build_runtime_package.py is the build script — changes to - # it (e.g. a fix to the import rewriter or a manifest emit) directly - # affect what ships in the wheel even if no workspace/ file changes. - # The 2026-04-27 lib/ subpackage incident missed an auto-publish for - # exactly this reason — PR #2174 only changed scripts/ and the - # operator had to remember a manual dispatch. - - "workspace/**" - - "scripts/build_runtime_package.py" - workflow_dispatch: - inputs: - version: - description: "Version to publish (e.g. 0.1.6). Required for manual dispatch." - required: true - type: string - -permissions: - contents: read - -# Serialize publishes so two staging merges landing seconds apart don't -# both compute "latest+1" and race on PyPI upload. The second one waits. -concurrency: - group: publish-runtime - cancel-in-progress: false - -jobs: - publish: - runs-on: ubuntu-latest - environment: pypi-publish - permissions: - contents: read - id-token: write # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed - outputs: - version: ${{ steps.version.outputs.version }} - wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }} - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: "3.11" - cache: pip - - - name: Derive version (tag, manual input, or PyPI auto-bump) - id: version - run: | - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - VERSION="${{ inputs.version }}" - elif echo "$GITHUB_REF_NAME" | grep -q "^runtime-v"; then - # Tag is `runtime-vX.Y.Z` — strip the prefix. - VERSION="${GITHUB_REF_NAME#runtime-v}" - else - # Auto-publish from staging push. Query PyPI for the current - # latest and bump the patch component. concurrency: group above - # serializes parallel staging merges so we don't race on the - # bump. If PyPI is unreachable, fail loud — better to skip a - # publish than to overwrite an existing version. - LATEST=$(curl -fsS --retry 3 https://pypi.org/pypi/molecule-ai-workspace-runtime/json \ - | python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])") - MAJOR=$(echo "$LATEST" | cut -d. -f1) - MINOR=$(echo "$LATEST" | cut -d. -f2) - PATCH=$(echo "$LATEST" | cut -d. -f3) - VERSION="${MAJOR}.${MINOR}.$((PATCH+1))" - echo "Auto-bumped from PyPI latest $LATEST -> $VERSION" - fi - if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+|rc[0-9]+|a[0-9]+|b[0-9]+|\.post[0-9]+)?$'; then - echo "::error::version $VERSION does not match PEP 440" - exit 1 - fi - echo "version=$VERSION" >> "$GITHUB_OUTPUT" - echo "Publishing molecule-ai-workspace-runtime $VERSION" - - - name: Install build tooling - run: pip install build twine - - - name: Build package from workspace/ - run: | - python scripts/build_runtime_package.py \ - --version "${{ steps.version.outputs.version }}" \ - --out "${{ runner.temp }}/runtime-build" - - - name: Build wheel + sdist - working-directory: ${{ runner.temp }}/runtime-build - run: python -m build - - - name: Capture wheel SHA256 for cascade content-verification - # Recorded BEFORE upload so the cascade probe can verify the - # bytes Fastly serves under the new version's URL match what - # we built. Closes a hole left by #2197: that probe verified - # pip can resolve the version (catches propagation lag) but - # not that the wheel content matches (would silently pass a - # Fastly stale-content scenario where the new version's URL - # serves an old wheel binary). - id: wheel_hash - working-directory: ${{ runner.temp }}/runtime-build - run: | - set -eu - WHEEL=$(ls dist/*.whl 2>/dev/null | head -1) - if [ -z "$WHEEL" ]; then - echo "::error::No .whl in dist/ — `python -m build` must have failed silently" - exit 1 - fi - HASH=$(sha256sum "$WHEEL" | awk '{print $1}') - echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT" - echo "Local wheel SHA256 (pre-upload): ${HASH}" - echo "Wheel filename: $(basename "$WHEEL")" - - - name: Verify package contents (sanity) - working-directory: ${{ runner.temp }}/runtime-build - # Smoke logic lives in scripts/wheel_smoke.py so the same gate runs - # at both PR-time (runtime-prbuild-compat.yml) and publish-time - # (here). Splitting the smoke across two heredocs let them drift - # apart historically — one script keeps them locked. - run: | - python -m twine check dist/* - python -m venv /tmp/smoke - /tmp/smoke/bin/pip install --quiet dist/*.whl - /tmp/smoke/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py" - - - name: Publish to PyPI (Trusted Publisher / OIDC) - # PyPI side is configured: project molecule-ai-workspace-runtime → - # publisher molecule-ai/molecule-core, workflow publish-runtime.yml, - # environment pypi-publish. The action mints a short-lived OIDC - # token and exchanges it for a PyPI upload credential — no static - # API token in this repo's secrets. - uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1 - with: - packages-dir: ${{ runner.temp }}/runtime-build/dist/ - - cascade: - # After PyPI accepts the upload, fan out a repository_dispatch to each - # template repo so they rebuild their image against the new runtime. - # Each template's `runtime-published.yml` receiver picks up the event, - # pulls the new PyPI version (their requirements.txt pin is `>=`), and - # republishes ghcr.io/molecule-ai/workspace-template-:latest. - # - # Soft-fail per repo: if one template's dispatch fails (perms missing, - # repo archived, etc.) we still try the others and surface the failures - # in the workflow summary instead of aborting the whole cascade. - needs: publish - runs-on: ubuntu-latest - steps: - - name: Wait for PyPI to propagate the new version - # PyPI accepts the upload, then takes a few seconds to make the - # new version visible across all THREE surfaces pip touches: - # 1. /pypi///json — metadata endpoint - # 2. /simple// — pip's primary download index - # 3. files.pythonhosted.org — CDN-fronted wheel binary - # Each has its own cache. The previous check polled only (1) - # and would let the cascade fire while (2) or (3) still served - # the previous version, so downstream `pip install` resolved - # to the old wheel. Docker layer cache then locked that stale - # resolution in for subsequent rebuilds (the cache trap that - # bit us five times in one night). - # - # Two-stage probe per poll: - # (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds - # only when the version is resolvable. Catches surface (1) - # and (2) propagation lag. - # (b) `pip download` of the same wheel + SHA256 compare against - # the just-built dist's hash. Catches surface (3) lag AND - # Fastly serving stale content under the new version's URL - # (a separate Fastly-corruption mode that pip-install alone - # can't see, since pip install resolves+unpacks against - # whatever bytes Fastly returns and never inspects them). - # Both must pass before the cascade fans out. - # - # The venv is reused across polls; only `pip install`/`pip - # download` run in the loop, with --force-reinstall + - # --no-cache-dir so the previous poll's cached state doesn't - # mask propagation lag. - env: - RUNTIME_VERSION: ${{ needs.publish.outputs.version }} - EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }} - run: | - set -eu - if [ -z "$EXPECTED_SHA256" ]; then - echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade." - exit 1 - fi - python -m venv /tmp/propagation-probe - PROBE=/tmp/propagation-probe/bin - $PROBE/pip install --upgrade --quiet pip - # Poll budget: 30 attempts × (~3-5s pip install + ~3s pip - # download + 4s sleep) ≈ 5-6 min wall on a slow GH runner. - # Generous vs PyPI's typical few-seconds propagation; - # failures past this are signal of a real PyPI / Fastly - # issue, not just lag. - for i in $(seq 1 30); do - # Stage (a): can pip resolve and install the version? - if $PROBE/pip install \ - --quiet \ - --no-cache-dir \ - --force-reinstall \ - --no-deps \ - "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ - >/dev/null 2>&1; then - INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \ - | awk -F': ' '/^Version:/{print $2}') - if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then - # Stage (b): does Fastly serve the bytes we uploaded? - # `pip download` writes the actual .whl file to disk so - # we can sha256sum it (vs `pip install` which unpacks - # and discards). - rm -rf /tmp/probe-dl - mkdir -p /tmp/probe-dl - if $PROBE/pip download \ - --quiet \ - --no-cache-dir \ - --no-deps \ - --dest /tmp/probe-dl \ - "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ - >/dev/null 2>&1; then - WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1) - if [ -n "$WHEEL" ]; then - ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}') - if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then - echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})" - exit 0 - fi - # Hash mismatch: PyPI accepted our upload but Fastly - # is serving different bytes under the version's URL. - # Most often this is propagation lag of the BINARY - # surface — the version is resolvable but the wheel - # cache hasn't caught up. Retry. - echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying" - fi - fi - fi - fi - sleep 4 - done - echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min." - echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}" - echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces." - exit 1 - - - name: Fan out via push to .runtime-version - env: - # Gitea PAT with write:repository scope on the 8 cascade-active - # template repos. Used here for `git push` (NOT for an API - # dispatch — Gitea 1.22.6 has no repository_dispatch endpoint; - # empirically verified across 6 candidate paths in molecule- - # core#20 issuecomment-913). The push trips each template's - # existing `on: push: branches: [main]` trigger on - # publish-image.yml, which then reads the updated - # .runtime-version via its resolve-version job. - DISPATCH_TOKEN: ${{ secrets.DISPATCH_TOKEN }} - RUNTIME_VERSION: ${{ needs.publish.outputs.version }} - run: | - set +e # don't abort on a single repo failure — collect them all - - # Soft-skip on workflow_dispatch when the token is missing - # (operator ad-hoc test); hard-fail on push so unattended - # publishes can't silently skip the cascade. Same shape as - # the original v1, intentional split per the schedule-vs- - # dispatch hardening 2026-04-28. - if [ -z "$DISPATCH_TOKEN" ]; then - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - echo "::warning::DISPATCH_TOKEN secret not set — skipping cascade." - echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually." - exit 0 - fi - echo "::error::DISPATCH_TOKEN secret missing — cascade cannot fan out." - echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade." - echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch." - exit 1 - fi - VERSION="$RUNTIME_VERSION" - if [ -z "$VERSION" ]; then - echo "::error::publish job did not expose a version output — cascade cannot fan out" - exit 1 - fi - - # All 9 workspace templates declared in manifest.json. The list - # MUST stay aligned with manifest.json's workspace_templates — - # cascade-list-drift-gate.yml enforces this in CI per the - # codex-stuck-on-stale-runtime invariant from PR #2556. - # Long-term goal: derive this list from manifest.json so it - # can't drift even on a manifest edit (RFC #388 Phase-1). - # - # Per-template publish-image.yml presence is checked at - # cascade-time below: codex doesn't ship one today, so the - # cascade soft-skips it with an informational message rather - # than dropping it from this list (which would re-introduce - # the drift the gate exists to catch). - GITEA_URL="${GITEA_URL:-https://git.moleculesai.app}" - TEMPLATES="claude-code hermes openclaw codex langgraph crewai autogen deepagents gemini-cli" - FAILED="" - SKIPPED="" - - # Configure git identity once. The persona owning DISPATCH_TOKEN - # is the same identity that authored this commit on each - # template; using a generic "publish-runtime cascade" co-author - # trailer in the message keeps the audit trail honest about the - # workflow-driven origin. - git config --global user.name "publish-runtime cascade" - git config --global user.email "publish-runtime@moleculesai.app" - - WORKDIR="$(mktemp -d)" - for tpl in $TEMPLATES; do - REPO="molecule-ai/molecule-ai-workspace-template-$tpl" - CLONE="$WORKDIR/$tpl" - - # Pre-check: skip templates without a publish-image.yml. - # The cascade's job is to trip the template's on-push - # rebuild — if there's no rebuild workflow, pushing a - # .runtime-version commit is just noise on the target - # repo. Use the Gitea contents API (no clone required for - # the probe). 200 = present; 404 = absent. - HTTP=$(curl -sS -o /dev/null -w "%{http_code}" \ - -H "Authorization: token $DISPATCH_TOKEN" \ - "$GITEA_URL/api/v1/repos/$REPO/contents/.github/workflows/publish-image.yml") - if [ "$HTTP" = "404" ]; then - echo "↷ $tpl has no publish-image.yml — soft-skip (informational; manifest still tracks it)" - SKIPPED="$SKIPPED $tpl" - continue - fi - if [ "$HTTP" != "200" ]; then - echo "::warning::$tpl publish-image.yml probe returned HTTP $HTTP — proceeding anyway, push will surface the real failure if any" - fi - - # Use a per-template attempt loop so a transient race (e.g. - # human pushing to the same template at the same instant) - # doesn't lose the cascade. Bounded retries (3) — beyond - # that we surface the failure and let the operator retry. - attempt=0 - success=false - while [ $attempt -lt 3 ]; do - attempt=$((attempt + 1)) - rm -rf "$CLONE" - if ! git clone --depth=1 \ - "https://x-access-token:${DISPATCH_TOKEN}@${GITEA_URL#https://}/$REPO.git" \ - "$CLONE" >/tmp/clone.log 2>&1; then - echo "::warning::clone $tpl attempt $attempt failed: $(tail -n3 /tmp/clone.log)" - sleep 2 - continue - fi - - cd "$CLONE" - echo "$VERSION" > .runtime-version - - # Idempotency guard: if the file already matches, this - # publish is a re-run for a version already cascaded. - # Don't push a no-op commit (would spuriously re-trip the - # template's on-push and rebuild for nothing). - if git diff --quiet -- .runtime-version; then - echo "✓ $tpl already at $VERSION — no commit needed (idempotent)" - success=true - cd - >/dev/null - break - fi - - git add .runtime-version - git commit -m "chore: pin runtime to $VERSION (publish-runtime cascade)" \ - -m "Co-Authored-By: publish-runtime cascade " \ - >/dev/null - - if git push origin HEAD:main >/tmp/push.log 2>&1; then - echo "✓ $tpl pushed $VERSION on attempt $attempt" - success=true - cd - >/dev/null - break - fi - - # Likely a non-fast-forward — pull-rebase and retry. - # Don't force-push: that would silently overwrite a racing - # human/cascade commit. - echo "::warning::push $tpl attempt $attempt failed, pull-rebasing: $(tail -n3 /tmp/push.log)" - git pull --rebase origin main >/tmp/rebase.log 2>&1 || true - cd - >/dev/null - done - - if [ "$success" != "true" ]; then - FAILED="$FAILED $tpl" - fi - done - rm -rf "$WORKDIR" - - if [ -n "$FAILED" ]; then - echo "::error::Cascade incomplete after 3 retries each. Failed templates:$FAILED" - echo "::error::PyPI publish succeeded; failed templates lag the new version. Re-run this workflow_dispatch with the same version to retry only the laggers (idempotent — already-cascaded templates skip)." - exit 1 - fi - if [ -n "$SKIPPED" ]; then - echo "Cascade complete: pinned $VERSION on cascade-active templates. Soft-skipped (no publish-image.yml):$SKIPPED" - else - echo "Cascade complete: $VERSION pinned across all manifest workspace_templates." - fi diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml deleted file mode 100644 index edea6bf9..00000000 --- a/.github/workflows/secret-scan.yml +++ /dev/null @@ -1,214 +0,0 @@ -name: Secret scan - -# Hard CI gate. Refuses any PR / push whose diff additions contain a -# recognisable credential. Defense-in-depth for the #2090-class incident -# (2026-04-24): GitHub's hosted Copilot Coding Agent leaked a ghs_* -# installation token into tenant-proxy/package.json via `npm init` -# slurping the URL from a token-embedded origin remote. We can't fix -# upstream's clone hygiene, so we gate here. -# -# Also the canonical reusable workflow for the rest of the org. Other -# Molecule-AI repos enroll with a single 3-line workflow: -# -# jobs: -# secret-scan: -# uses: molecule-ai/molecule-core/.github/workflows/secret-scan.yml@staging -# -# Pin to @staging not @main — staging is the active default branch, -# main lags via the staging-promotion workflow. Updates ride along -# automatically on the next consumer workflow run. -# -# Same regex set as the runtime's bundled pre-commit hook -# (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh). -# Keep the two sides aligned when adding patterns. - -on: - pull_request: - types: [opened, synchronize, reopened] - push: - branches: [main, staging] - # Required for GitHub merge queue: the queue's pre-merge CI run on - # `gh-readonly-queue/...` refs needs this check to fire so the queue - # gets a real result instead of stalling forever AWAITING_CHECKS. - merge_group: - types: [checks_requested] - # Reusable workflow entry point for other Molecule-AI repos. - workflow_call: - -jobs: - scan: - name: Scan diff for credential-shaped strings - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 2 # need previous commit to diff against on push events - - # For pull_request events the diff base may be many commits behind - # HEAD and absent from the shallow clone. Fetch it explicitly. - - name: Fetch PR base SHA (pull_request events only) - if: github.event_name == 'pull_request' - run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }} - - # For merge_group events the queue's pre-merge ref is a commit on - # `gh-readonly-queue/...` whose parent is the queue's base_sha. - # That parent isn't part of the queue branch's shallow clone, so - # we fetch it explicitly. Without this the diff falls through to - # "no BASE → scan entire tree" mode and false-positives on legit - # test fixtures (e.g. canvas/src/lib/validation/__tests__/secret-formats.test.ts). - - name: Fetch merge_group base SHA (merge_group events only) - if: github.event_name == 'merge_group' - run: git fetch --depth=1 origin ${{ github.event.merge_group.base_sha }} - - - name: Refuse if credential-shaped strings appear in diff additions - env: - # Plumb event-specific SHAs through env so the script doesn't - # need conditional `${{ ... }}` interpolation per event type. - # github.event.before/after only exist on push events; - # merge_group has its own base_sha/head_sha; pull_request has - # pull_request.base.sha / pull_request.head.sha. - PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} - PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} - MG_BASE_SHA: ${{ github.event.merge_group.base_sha }} - MG_HEAD_SHA: ${{ github.event.merge_group.head_sha }} - PUSH_BEFORE: ${{ github.event.before }} - PUSH_AFTER: ${{ github.event.after }} - run: | - # Pattern set covers GitHub family (the actual #2090 vector), - # Anthropic / OpenAI / Slack / AWS. Anchored on prefixes with low - # false-positive rates against agent-generated content. Mirror of - # molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh - # — keep aligned. - SECRET_PATTERNS=( - 'ghp_[A-Za-z0-9]{36,}' # GitHub PAT (classic) - 'ghs_[A-Za-z0-9]{36,}' # GitHub App installation token - 'gho_[A-Za-z0-9]{36,}' # GitHub OAuth user-to-server - 'ghu_[A-Za-z0-9]{36,}' # GitHub OAuth user - 'ghr_[A-Za-z0-9]{36,}' # GitHub OAuth refresh - 'github_pat_[A-Za-z0-9_]{82,}' # GitHub fine-grained PAT - 'sk-ant-[A-Za-z0-9_-]{40,}' # Anthropic API key - 'sk-proj-[A-Za-z0-9_-]{40,}' # OpenAI project key - 'sk-svcacct-[A-Za-z0-9_-]{40,}' # OpenAI service-account key - 'sk-cp-[A-Za-z0-9_-]{60,}' # MiniMax API key (F1088 vector — caught only after the fact) - 'xox[baprs]-[A-Za-z0-9-]{20,}' # Slack tokens - 'AKIA[0-9A-Z]{16}' # AWS access key ID - 'ASIA[0-9A-Z]{16}' # AWS STS temp access key ID - ) - - # Determine the diff base. Each event type stores its SHAs in - # a different place — see the env block above. - case "${{ github.event_name }}" in - pull_request) - BASE="$PR_BASE_SHA" - HEAD="$PR_HEAD_SHA" - ;; - merge_group) - BASE="$MG_BASE_SHA" - HEAD="$MG_HEAD_SHA" - ;; - *) - BASE="$PUSH_BEFORE" - HEAD="$PUSH_AFTER" - ;; - esac - - # On push events with shallow clones, BASE may be present in - # the event payload but absent from the local object DB - # (fetch-depth=2 doesn't always reach the previous commit - # across true merges). Try fetching it on demand. If the - # fetch fails — e.g. the SHA was force-overwritten — we fall - # through to the empty-BASE branch below, which scans the - # entire tree as if every file were new. Correct, just slow. - if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then - if ! git cat-file -e "$BASE" 2>/dev/null; then - git fetch --depth=1 origin "$BASE" 2>/dev/null || true - fi - fi - - # Files added or modified in this change. - if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then - # New branch / no previous SHA / BASE unreachable — check the - # entire tree as added content. Slower, but correct on first - # push. - CHANGED=$(git ls-tree -r --name-only HEAD) - DIFF_RANGE="" - else - CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD") - DIFF_RANGE="$BASE $HEAD" - fi - - if [ -z "$CHANGED" ]; then - echo "No changed files to inspect." - exit 0 - fi - - # Self-exclude: this workflow file legitimately contains the - # pattern strings as regex literals. Without an exclude it would - # block its own merge. - SELF=".github/workflows/secret-scan.yml" - - OFFENDING="" - # `while IFS= read -r` (not `for f in $CHANGED`) so filenames - # containing whitespace don't word-split silently — a path - # with a space would otherwise produce two iterations on - # tokens that aren't real filenames, breaking the - # self-exclude + diff lookup. - while IFS= read -r f; do - [ -z "$f" ] && continue - [ "$f" = "$SELF" ] && continue - if [ -n "$DIFF_RANGE" ]; then - ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true) - else - # No diff range (new branch first push) — scan the full file - # contents as if every line were new. - ADDED=$(cat "$f" 2>/dev/null || true) - fi - [ -z "$ADDED" ] && continue - for pattern in "${SECRET_PATTERNS[@]}"; do - if echo "$ADDED" | grep -qE "$pattern"; then - OFFENDING="${OFFENDING}${f} (matched: ${pattern})\n" - break - fi - done - done <<< "$CHANGED" - - if [ -n "$OFFENDING" ]; then - echo "::error::Credential-shaped strings detected in diff additions:" - # `printf '%b' "$OFFENDING"` interprets backslash escapes - # (the literal `\n` we appended above becomes a newline) - # WITHOUT treating OFFENDING as a format string. Plain - # `printf "$OFFENDING"` is a format-string sink: a filename - # containing `%` would be interpreted as a conversion - # specifier, corrupting the error message (or printing - # `%(missing)` artifacts). - printf '%b' "$OFFENDING" - echo "" - echo "The actual matched values are NOT echoed here, deliberately —" - echo "round-tripping a leaked credential into CI logs widens the blast" - echo "radius (logs are searchable + retained)." - echo "" - echo "Recovery:" - echo " 1. Remove the secret from the file. Replace with an env var" - echo " reference (e.g. \${{ secrets.GITHUB_TOKEN }} in workflows," - echo " process.env.X in code)." - echo " 2. If the credential was already pushed (this PR's commit" - echo " history reaches a public ref), treat it as compromised —" - echo " ROTATE it immediately, do not just remove it. The token" - echo " remains valid in git history forever and may be in any" - echo " log/cache that consumed this branch." - echo " 3. Force-push the cleaned commit (or stack a revert) and" - echo " re-run CI." - echo "" - echo "If the match is a false positive (test fixture, docs example," - echo "or this workflow's own regex literals): use a clearly-fake" - echo "placeholder like ghs_EXAMPLE_DO_NOT_USE that doesn't satisfy" - echo "the length suffix, OR add the file path to the SELF exclude" - echo "list in this workflow with a short reason." - echo "" - echo "Mirror of the regex set lives in the runtime's bundled" - echo "pre-commit hook (molecule-ai-workspace-runtime:" - echo "molecule_runtime/scripts/pre-commit-checks.sh) — keep aligned." - exit 1 - fi - - echo "✓ No credential-shaped strings in this change."