diff --git a/.gitea/workflows/continuous-synth-e2e.yml b/.gitea/workflows/continuous-synth-e2e.yml index 299d42e0..23160862 100644 --- a/.gitea/workflows/continuous-synth-e2e.yml +++ b/.gitea/workflows/continuous-synth-e2e.yml @@ -131,7 +131,7 @@ jobs: # Forced false for cron; respected for manual dispatch E2E_KEEP_ORG: ${{ github.event.inputs.keep_org == 'true' && '1' || '' }} MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }} - MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }} + MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} # MiniMax key is the canary's PRIMARY auth path. claude-code # template's `minimax` provider routes ANTHROPIC_BASE_URL to # api.minimax.io/anthropic and reads MINIMAX_API_KEY at boot. @@ -168,8 +168,8 @@ jobs: # can read the verify-secrets step's stderr — the failure is # itself the verification signal. if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then - echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — synth E2E cannot run" - echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway." + echo "::error::MOLECULE_STAGING_ADMIN_TOKEN secret missing — synth E2E cannot run" + echo "::error::Set it at Settings → Secrets and Variables → Actions (same secret used by canary-staging.yml and all e2e-staging workflows)." exit 1 fi diff --git a/.gitea/workflows/redeploy-tenants-on-staging.yml b/.gitea/workflows/redeploy-tenants-on-staging.yml index 6243d3f9..634e8f8b 100644 --- a/.gitea/workflows/redeploy-tenants-on-staging.yml +++ b/.gitea/workflows/redeploy-tenants-on-staging.yml @@ -90,14 +90,16 @@ jobs: run: sleep 30 - name: Call staging-CP redeploy-fleet - # CP_STAGING_ADMIN_API_TOKEN must be set as a repo/org secret - # on molecule-ai/molecule-core, matching staging-CP's - # CP_ADMIN_API_TOKEN env var (visible in Railway controlplane - # / staging environment). Stored separately from the prod - # CP_ADMIN_API_TOKEN so a leak of one doesn't auth the other. + # MOLECULE_STAGING_ADMIN_TOKEN must be set as a repo/org secret + # on molecule-ai/molecule-core. This is the confirmed-existing + # staging CP admin token (also used by canary-staging.yml and + # all e2e-staging-*.yml workflows). The alternative name + # CP_STAGING_ADMIN_API_TOKEN (per the original port comment) was + # never populated in Gitea per issue #425 §425 audit. Using the + # confirmed-working MOLECULE_STAGING_ADMIN_TOKEN instead. env: CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }} - CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }} + MOLECULE_STAGING_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} TARGET_TAG: ${{ inputs.target_tag || 'staging-latest' }} CANARY_SLUG: ${{ inputs.canary_slug || '' }} SOAK_SECONDS: ${{ inputs.soak_seconds || '60' }} @@ -110,15 +112,15 @@ jobs: # and sweep-cf-tunnels): hard-fail on auto-trigger when the # secret is missing so a misconfigured-repo doesn't silently # serve stale staging tenants. Soft-skip on operator dispatch. - if [ -z "${CP_STAGING_ADMIN_API_TOKEN:-}" ]; then + if [ -z "${MOLECULE_STAGING_ADMIN_TOKEN:-}" ]; then if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - echo "::warning::CP_STAGING_ADMIN_API_TOKEN secret not set — skipping redeploy" - echo "::warning::Set CP_STAGING_ADMIN_API_TOKEN in repo secrets to enable auto-redeploy." - echo "::notice::Pull the value from staging-CP's CP_ADMIN_API_TOKEN env in Railway." + echo "::warning::MOLECULE_STAGING_ADMIN_TOKEN secret not set — skipping redeploy" + echo "::warning::Set MOLECULE_STAGING_ADMIN_TOKEN in repo secrets to enable auto-redeploy." + echo "::notice::This secret is shared with canary-staging.yml and all e2e-staging workflows." exit 0 fi - echo "::error::staging redeploy cannot run — CP_STAGING_ADMIN_API_TOKEN secret missing" - echo "::error::set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway." + echo "::error::staging redeploy cannot run — MOLECULE_STAGING_ADMIN_TOKEN secret missing" + echo "::error::set it at Settings → Secrets and Variables → Actions (same secret used by canary-staging.yml)." exit 1 fi @@ -151,7 +153,7 @@ jobs: set +e curl -sS -o "$HTTP_RESPONSE" -w '%{http_code}' \ -m 1200 \ - -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \ + -H "Authorization: Bearer $MOLECULE_STAGING_ADMIN_TOKEN" \ -H "Content-Type: application/json" \ -X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \ -d "$BODY" >"$HTTP_CODE_FILE" diff --git a/.gitea/workflows/sweep-aws-secrets.yml b/.gitea/workflows/sweep-aws-secrets.yml index a6572e8e..5544a7db 100644 --- a/.gitea/workflows/sweep-aws-secrets.yml +++ b/.gitea/workflows/sweep-aws-secrets.yml @@ -29,13 +29,15 @@ name: Sweep stale AWS Secrets Manager secrets # reconciler enumerator) is filed as a separate controlplane # issue. This sweeper is the immediate cost-relief stopgap. # -# IAM principal: AWS_JANITOR_ACCESS_KEY_ID / AWS_JANITOR_SECRET_ACCESS_KEY. -# This is a DEDICATED principal — the production `molecule-cp` IAM -# user lacks `secretsmanager:ListSecrets` (it only has -# Get/Create/Update/Delete on specific resources, scoped to its -# operational needs). The janitor needs ListSecrets across the -# `molecule/tenant/*` prefix, which warrants a separate principal so -# we don't broaden the prod-CP policy. +# AWS credentials: the confirmed Gitea secrets are AWS_ACCESS_KEY_ID / +# AWS_SECRET_ACCESS_KEY (the molecule-cp IAM user). These are the same +# credentials used by the rest of the platform. The dedicated +# AWS_JANITOR_* naming (which the original GitHub workflow used) was +# never populated in Gitea — the existing secrets are AWS_ACCESS_KEY_ID / +# AWS_SECRET_ACCESS_KEY (per issue #425 §425 audit). These DO have +# secretsmanager:ListSecrets (the production molecule-cp principal); +# if ListSecrets is revoked in future, a dedicated janitor principal +# would need to be created and the Gitea secret names updated here. # # Safety: the script's MAX_DELETE_PCT gate (default 50%, mirroring # sweep-cf-orphans.yml — tenant secrets are durable by design, unlike @@ -71,8 +73,8 @@ jobs: timeout-minutes: 30 env: AWS_REGION: ${{ secrets.AWS_REGION || 'us-east-1' }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_JANITOR_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_JANITOR_SECRET_ACCESS_KEY }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }} CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }} MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '50' }} @@ -99,13 +101,11 @@ jobs: if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "::warning::skipping sweep — secrets not configured: ${missing[*]}" echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun." - echo "::warning::AWS_JANITOR_* must belong to a principal with secretsmanager:ListSecrets and secretsmanager:DeleteSecret on molecule/tenant/* (the prod molecule-cp principal lacks ListSecrets)." echo "skip=true" >> "$GITHUB_OUTPUT" exit 0 fi echo "::error::sweep cannot run — required secrets missing: ${missing[*]}" echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow." - echo "::error::AWS_JANITOR_* must belong to a principal with secretsmanager:ListSecrets and secretsmanager:DeleteSecret on molecule/tenant/*." exit 1 fi echo "All required secrets present ✓" diff --git a/.gitea/workflows/sweep-cf-orphans.yml b/.gitea/workflows/sweep-cf-orphans.yml index b18630b7..28af2537 100644 --- a/.gitea/workflows/sweep-cf-orphans.yml +++ b/.gitea/workflows/sweep-cf-orphans.yml @@ -33,6 +33,11 @@ name: Sweep stale Cloudflare DNS records # gate halts before damage. Decision-function unit tests in # scripts/ops/test_sweep_cf_decide.py (#2027) cover the rule # classifier. +# +# Secrets: CF_API_TOKEN, CF_ZONE_ID, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY +# are confirmed existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and +# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step +# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly. on: schedule: diff --git a/.gitea/workflows/sweep-cf-tunnels.yml b/.gitea/workflows/sweep-cf-tunnels.yml index 1fa12cfd..d1828ab2 100644 --- a/.gitea/workflows/sweep-cf-tunnels.yml +++ b/.gitea/workflows/sweep-cf-tunnels.yml @@ -28,6 +28,11 @@ name: Sweep stale Cloudflare Tunnels # Safety: the script's MAX_DELETE_PCT gate (default 90% — higher than # the DNS sweep's 50% because tenant-shaped tunnels are mostly # orphans by design) refuses to nuke past the threshold. +# +# Secrets: CF_API_TOKEN, CF_ACCOUNT_ID are confirmed existing per +# issue #425 §425 audit. CP_ADMIN_API_TOKEN and CP_STAGING_ADMIN_API_TOKEN +# are unconfirmed — if missing, the verify step (schedule → hard-fail, +# dispatch → soft-skip) surfaces it clearly. on: schedule: