diff --git a/.gitea/workflows/block-internal-paths.yml b/.gitea/workflows/block-internal-paths.yml new file mode 100644 index 00000000..ed60e7e4 --- /dev/null +++ b/.gitea/workflows/block-internal-paths.yml @@ -0,0 +1,148 @@ +name: Block internal-flavored paths + +# Ported from .github/workflows/block-internal-paths.yml on 2026-05-11 per +# RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - Dropped `merge_group: { types: [checks_requested] }` (Gitea has no +# merge queue; no `gh-readonly-queue/...` refs). +# - Workflow-level env.GITHUB_SERVER_URL set per +# feedback_act_runner_github_server_url. +# - `continue-on-error: true` on the job (RFC §1 contract — surface +# defects without blocking; follow-up PR flips after triage). +# +# Hard CI gate. Internal content (positioning, competitive briefs, sales +# playbooks, PMM/press drip, draft campaigns) lives in molecule-ai/internal — +# this public monorepo must never re-acquire those paths. CEO directive +# 2026-04-23 after a fleet-wide audit found 79 internal files leaked here. +# +# Failure mode without this gate: agents (PMM, Research, DevRel, Sales) drop +# briefs into the easiest path their cwd resolves to (root /research, +# /marketing, /docs/marketing) and gitignore alone won't catch a `git add -f` +# or a stale gitignore line. This workflow is the mechanical backstop. + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: [main, staging] + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +jobs: + check: + name: Block forbidden paths + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking + # the PR. Follow-up PR flips this off after surfaced defects are + # triaged. + continue-on-error: true + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 2 # need previous commit to diff against on push events + + # For pull_request events the diff base is github.event.pull_request.base.sha, + # which may be many commits behind HEAD and therefore absent from the + # shallow clone above. Fetch it explicitly (depth=1 keeps it fast). + - name: Fetch PR base SHA (pull_request events only) + if: github.event_name == 'pull_request' + run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }} + + - name: Refuse if forbidden paths appear + env: + # Plumb event-specific SHAs through env so the script doesn't + # need conditional `${{ ... }}` interpolation per event type. + # github.event.before/after only exist on push events; + # pull_request has pull_request.base.sha / pull_request.head.sha. + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PUSH_BEFORE: ${{ github.event.before }} + PUSH_AFTER: ${{ github.event.after }} + run: | + # Paths that must NEVER live in the public monorepo. Add to this + # list narrowly — broader patterns belong in .gitignore so day-to-day + # docs work isn't accidentally blocked. + FORBIDDEN_PATTERNS=( + "^research/" + "^marketing/" + "^docs/marketing/" + "^comment-[0-9]+\.json$" + "^test-pmm.*\.(txt|md)$" + "^tick-reflections.*\.(txt|md)$" + ".*-temp\.(md|txt)$" + ) + + # Determine the diff base. Each event type stores its SHAs in + # a different place — see the env block above. + case "${{ github.event_name }}" in + pull_request) + BASE="$PR_BASE_SHA" + HEAD="$PR_HEAD_SHA" + ;; + *) + BASE="$PUSH_BEFORE" + HEAD="$PUSH_AFTER" + ;; + esac + + # On push events with shallow clones, BASE may be present in + # the event payload but absent from the local object DB + # (fetch-depth=2 doesn't always reach the previous commit + # across true merges). Try fetching it on demand. If the + # fetch fails — e.g. the SHA was force-overwritten — we fall + # through to the empty-BASE branch below, which scans the + # entire tree as if every file were new. Correct, just slow. + if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then + if ! git cat-file -e "$BASE" 2>/dev/null; then + git fetch --depth=1 origin "$BASE" 2>/dev/null || true + fi + fi + + # Files added or modified in this change. + if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then + # New branch / no previous SHA / BASE unreachable — check + # the entire tree as if every file were new. Slower but + # correct on first push or post-fetch-failure recovery. + CHANGED=$(git ls-tree -r --name-only HEAD) + else + CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD") + fi + + if [ -z "$CHANGED" ]; then + echo "No changed files to inspect." + exit 0 + fi + + OFFENDING="" + for path in $CHANGED; do + for pattern in "${FORBIDDEN_PATTERNS[@]}"; do + if echo "$path" | grep -qE "$pattern"; then + OFFENDING="${OFFENDING}${path} (matched: ${pattern})\n" + break + fi + done + done + + if [ -n "$OFFENDING" ]; then + echo "::error::Forbidden internal-flavored paths detected:" + printf "$OFFENDING" + echo "" + echo "These paths belong in molecule-ai/internal, not this public repo." + echo "See docs/internal-content-policy.md for canonical locations." + echo "" + echo "If your file is genuinely public-facing (e.g. a blog post" + echo "ready to ship), use one of these alternatives instead:" + echo " - Public-bound blog posts: docs/blog/.md" + echo " - Public-bound tutorials: docs/tutorials/.md" + echo " - Public devrel content: docs/devrel/.md" + echo "" + echo "If you legitimately need to add a new top-level path that" + echo "happens to match a forbidden pattern, edit" + echo ".gitea/workflows/block-internal-paths.yml and update the" + echo "FORBIDDEN_PATTERNS list with reviewer signoff." + exit 1 + fi + + echo "OK No forbidden paths in this change." diff --git a/.gitea/workflows/cascade-list-drift-gate.yml b/.gitea/workflows/cascade-list-drift-gate.yml new file mode 100644 index 00000000..99b8e8bb --- /dev/null +++ b/.gitea/workflows/cascade-list-drift-gate.yml @@ -0,0 +1,58 @@ +name: cascade-list-drift-gate + +# Ported from .github/workflows/cascade-list-drift-gate.yml on 2026-05-11 +# per RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - on.paths reference .gitea/workflows/publish-runtime.yml (the active +# Gitea workflow file) instead of .github/workflows/publish-runtime.yml +# (which Category A of this sweep deletes). +# - Explicit `WORKFLOW=` arg passed to the drift script so it audits the +# .gitea/ workflow (the script's default is still .github/... which +# will not exist post-Cat-A). +# - Workflow-level env.GITHUB_SERVER_URL set per +# feedback_act_runner_github_server_url. +# - `continue-on-error: true` on the job (RFC §1 contract — surface +# defects without blocking; follow-up PR flips after triage). +# +# Structural gate: TEMPLATES list in publish-runtime.yml must match +# manifest.json's workspace_templates exactly. Closes the recurrence +# path of PR #2556 (the data fix) and is the first concrete deliverable +# of RFC #388 PR-3. +# +# Triggers narrowly to keep CI quiet: only on PRs that actually change +# one of the two files. The path-filtered split + always-emit-result +# pattern (memory: "Required check names need a job that always runs") +# is unnecessary here because the workflow IS the check name and PR +# branch protection should require it directly. Future-proof: if this +# becomes a required check, add a no-op aggregator with always() so the +# name still emits when paths don't match. + +on: + pull_request: + branches: [staging, main] + paths: + - manifest.json + - .gitea/workflows/publish-runtime.yml + - scripts/check-cascade-list-vs-manifest.sh + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +permissions: + contents: read + +jobs: + check: + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking + # the PR. Follow-up PR flips this off after surfaced defects are + # triaged. + continue-on-error: true + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - name: Check cascade list matches manifest + # Pass the .gitea/ workflow path explicitly — the script's + # default still points at .github/... which Category A of this + # sweep removes. + run: bash scripts/check-cascade-list-vs-manifest.sh manifest.json .gitea/workflows/publish-runtime.yml diff --git a/.gitea/workflows/check-migration-collisions.yml b/.gitea/workflows/check-migration-collisions.yml new file mode 100644 index 00000000..e2aed7f5 --- /dev/null +++ b/.gitea/workflows/check-migration-collisions.yml @@ -0,0 +1,74 @@ +name: Check migration collisions + +# Ported from .github/workflows/check-migration-collisions.yml on 2026-05-11 +# per RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - on.paths includes .gitea/workflows/check-migration-collisions.yml +# (this file) instead of the .github/ one. +# - Workflow-level env.GITHUB_SERVER_URL pinned to https://git.moleculesai.app +# so scripts/ops/check_migration_collisions.py can derive the Gitea API +# base (the script already supports this; see _gitea_api_url()). +# - `continue-on-error: true` on the job (RFC §1 contract). +# +# Hard gate (#2341): fails a PR that adds a migration prefix already +# claimed by the base branch or another open PR. Caught manually 2026-04-30 +# during PR #2276 rebase: 044_runtime_image_pins collided with +# 044_platform_inbound_secret from RFC #2312. This workflow makes that +# check automatic. +# +# Trigger model: pull_request only — there's no value running this on +# pushes to staging or main (those are post-merge; the gate must fire +# pre-merge to be useful). Path filter scopes to PRs that actually touch +# migrations. + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'workspace-server/migrations/**' + - 'scripts/ops/check_migration_collisions.py' + - '.gitea/workflows/check-migration-collisions.yml' + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +permissions: + contents: read + # API needs read access to other PRs to detect cross-PR collisions + pull-requests: read + +jobs: + check: + name: Migration version collision check + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking + # the PR. Follow-up PR flips this off after surfaced defects are + # triaged. + continue-on-error: true + timeout-minutes: 5 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # Need history to diff against base ref + fetch-depth: 0 + + - name: Detect collisions + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + BASE_REF: origin/${{ github.event.pull_request.base.ref }} + HEAD_REF: ${{ github.event.pull_request.head.sha }} + GITHUB_REPOSITORY: ${{ github.repository }} + # Auto-injected; Gitea aliases this for in-repo API access. + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Ensure the named base ref exists locally. checkout@v4 with + # fetch-depth=0 pulls full history, but the explicit fetch is + # cheap insurance against form-of-ref differences across runs. + # + # IMPORTANT: do NOT pass --depth=1 here. The script below uses + # `git diff origin/...` (three-dot, merge-base form), + # which fails with "fatal: no merge base" if the base ref is + # shallow. + git fetch origin "${{ github.event.pull_request.base.ref }}" || true + python3 scripts/ops/check_migration_collisions.py diff --git a/.gitea/workflows/lint-curl-status-capture.yml b/.gitea/workflows/lint-curl-status-capture.yml new file mode 100644 index 00000000..99f3f4c0 --- /dev/null +++ b/.gitea/workflows/lint-curl-status-capture.yml @@ -0,0 +1,104 @@ +name: Lint curl status-code capture + +# Ported from .github/workflows/lint-curl-status-capture.yml on 2026-05-11 +# per RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - on.paths and the lint scanner target .gitea/workflows/**.yml (the +# active Gitea workflow directory) instead of .github/workflows/**.yml +# (which the rest of this sweep is emptying out). +# - Self-skip path updated to the .gitea/ version of this file. +# - Dropped `merge_group:` trigger. +# - Workflow-level env.GITHUB_SERVER_URL set per +# feedback_act_runner_github_server_url. +# - `continue-on-error: true` on the job (RFC §1 contract). +# +# Pins the workflow-bash anti-pattern that produced "HTTP 000000" on the +# 2026-05-04 redeploy-tenants-on-main run for sha 2b862f6: +# +# HTTP_CODE=$(curl ... -w '%{http_code}' ... || echo "000") +# +# When curl exits non-zero (connection reset -> 56, --fail-with-body 4xx/5xx +# -> 22), the `-w '%{http_code}'` already wrote a status to stdout — usually +# "000" for connection failures or the actual code for HTTP errors. The +# `|| echo "000"` then fires AND appends ANOTHER "000" to the captured +# stdout, producing values like "000000" or "409000" that fail string +# comparisons against "200" while looking superficially right. +# +# Same class of bug the synth-E2E §7c gate hit twice (PRs #2779/#2783 + +# #2797). Memory: feedback_curl_status_capture_pollution.md. + +on: + pull_request: + paths: ['.gitea/workflows/**'] + push: + branches: [main, staging] + paths: ['.gitea/workflows/**'] + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +jobs: + scan: + name: Scan workflows for curl status-capture pollution + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking + # the PR. Follow-up PR flips this off after surfaced defects are + # triaged. + continue-on-error: true + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Find curl ... -w '%{http_code}' ... || echo "000" subshells + run: | + set -uo pipefail + # Multi-line aware: look for `$(curl ... -w '%{http_code}' ... || echo "000")` + # subshell where the entire command-substitution wraps a curl that + # ends with `|| echo "000"`. Must distinguish from the SAFE shape + # `$(cat tempfile 2>/dev/null || echo "000")` — `cat` with a missing + # tempfile produces empty stdout, no pollution. + python3 <<'PY' + import os, re, sys, glob + + BAD_FILES = [] + + # Match the buggy substitution across newlines: $(curl ... -w '%{http_code}' ... || echo "000") + # The `\\n` is the bash line-continuation that lets curl flags span lines. + # We collapse continuation lines first, then look for the single-line bad pattern. + PATTERN = re.compile( + r'\$\(\s*curl\b[^)]*-w\s*[\'"]%\{http_code\}[\'"][^)]*\|\|\s*echo\s+"000"\s*\)', + re.DOTALL, + ) + + # Self-skip: this lint workflow contains the literal anti-pattern in + # its own docstring — that's intentional, not a bug. + SELF = ".gitea/workflows/lint-curl-status-capture.yml" + + for f in sorted(glob.glob(".gitea/workflows/*.yml")): + if f == SELF: + continue + with open(f) as fh: + content = fh.read() + # Collapse bash line-continuations (\\\n + leading whitespace) + # into a single logical line so the regex can see the full + # curl invocation as one chunk. + flat = re.sub(r'\\\s*\n\s*', ' ', content) + for m in PATTERN.finditer(flat): + BAD_FILES.append((f, m.group(0)[:120])) + + if not BAD_FILES: + print("OK No curl-status-capture pollution patterns detected") + sys.exit(0) + + print(f"::error::Found {len(BAD_FILES)} curl-status-capture pollution site(s):") + for f, snippet in BAD_FILES: + print(f"::error file={f}::Curl status-capture pollution: '|| echo \"000\"' inside a $(curl ... -w '%{{http_code}}' ...) subshell. On non-2xx or connection failure, curl's -w writes a status, then exits non-zero, then the || echo appends another '000' — producing 'HTTP 000000' or '409000' that fails comparisons silently. Fix: route -w into a tempfile so the exit code can't pollute stdout. See memory feedback_curl_status_capture_pollution.md.") + print(f" matched: {snippet}...") + print() + print("Fix template:") + print(' set +e') + print(' curl ... -w \'%{http_code}\' >code.txt 2>/dev/null') + print(' set -e') + print(' HTTP_CODE=$(cat code.txt 2>/dev/null)') + print(' [ -z "$HTTP_CODE" ] && HTTP_CODE="000"') + sys.exit(1) + PY diff --git a/.gitea/workflows/railway-pin-audit.yml b/.gitea/workflows/railway-pin-audit.yml new file mode 100644 index 00000000..58f4809e --- /dev/null +++ b/.gitea/workflows/railway-pin-audit.yml @@ -0,0 +1,181 @@ +name: Railway pin audit (drift detection) + +# Ported from .github/workflows/railway-pin-audit.yml on 2026-05-11 per +# RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - Dropped `workflow_dispatch:` (Gitea 1.22.6 trigger handling). +# Manual runs go via cron-trigger bump or push the workflow file +# itself. +# - `actions/github-script@v9` blocks (which call github.rest.* — a +# GitHub-specific JS API) replaced with curl calls against the +# Gitea REST API (/api/v1/repos/.../issues, .../labels, +# .../comments). Same behaviour: open issue on drift, comment on +# repeat-drift, close on clean run. +# - Workflow-level env.GITHUB_SERVER_URL set so the curl calls can +# derive `git.moleculesai.app` from the runner env (with +# hard-coded fallback inside the steps). +# - `continue-on-error: true` on the job (RFC §1 contract). +# +# Daily audit of Railway env vars for drift-prone image-tag pins — +# automation-cadence layer over the detection script + regression test +# shipped in PR #2168 (#2001 closure). +# +# Background: on 2026-04-24 a stale `:staging-a14cf86` SHA pin in CP's +# TENANT_IMAGE caused 3+ hours of E2E failure with the appearance that +# "every fix didn't propagate" — really the tenant image was so old it +# didn't read the env vars those fixes produced. +# +# Cadence: once a day, 13:00 UTC (06:00 PT). +# +# Secret hardening: per feedback_schedule_vs_dispatch_secrets_hardening, +# the schedule trigger HARD-FAILS on missing RAILWAY_AUDIT_TOKEN. + +on: + schedule: + - cron: '0 13 * * *' + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +concurrency: + group: railway-pin-audit + cancel-in-progress: false + +permissions: + issues: write + contents: read + +jobs: + audit: + name: Audit Railway env vars for drift-prone pins + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking. + continue-on-error: true + timeout-minutes: 10 + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Verify RAILWAY_AUDIT_TOKEN present + env: + RAILWAY_AUDIT_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }} + id: secret_check + run: | + set -euo pipefail + if [ -n "${RAILWAY_AUDIT_TOKEN:-}" ]; then + echo "have_secret=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "have_secret=false" >> "$GITHUB_OUTPUT" + echo "::error::RAILWAY_AUDIT_TOKEN secret missing — schedule trigger requires it. Provision the token (read-only \`variables\` scope on the molecule-platform Railway project) and store as repo secret RAILWAY_AUDIT_TOKEN." + exit 1 + + - name: Install Railway CLI + if: steps.secret_check.outputs.have_secret == 'true' + run: | + set -euo pipefail + curl -fsSL https://railway.com/install.sh | sh + echo "$HOME/.railway/bin" >> "$GITHUB_PATH" + + - name: Verify Railway CLI authenticated + if: steps.secret_check.outputs.have_secret == 'true' + env: + RAILWAY_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }} + run: | + set -euo pipefail + if ! railway whoami >/dev/null 2>&1; then + echo "::error::Railway CLI failed to authenticate with RAILWAY_AUDIT_TOKEN — token may be revoked or scoped incorrectly" + exit 2 + fi + + - name: Link molecule-platform project + if: steps.secret_check.outputs.have_secret == 'true' + env: + RAILWAY_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }} + run: | + set -euo pipefail + railway link --project 7ccc8c68-61f4-42ab-9be5-586eeee11768 + + - name: Run drift audit + if: steps.secret_check.outputs.have_secret == 'true' + id: audit + env: + RAILWAY_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }} + run: | + set +e + bash scripts/ops/audit-railway-sha-pins.sh 2>&1 | tee /tmp/audit.log + rc=${PIPESTATUS[0]} + echo "rc=$rc" >> "$GITHUB_OUTPUT" + # Capture the audit log for the issue body. + { + echo 'log<> "$GITHUB_OUTPUT" + case "$rc" in + 0) exit 0 ;; + 1) echo "::warning::Drift-prone pin(s) detected — issue will be filed"; exit 1 ;; + 2) echo "::error::Railway CLI auth/link failed mid-script — token or project ID drift"; exit 2 ;; + *) echo "::error::Unexpected audit rc=$rc"; exit 1 ;; + esac + + - name: Open / update drift issue (Gitea API) + if: failure() && steps.audit.outputs.rc == '1' + env: + GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + AUDIT_LOG: ${{ steps.audit.outputs.log }} + SERVER_URL: ${{ env.GITHUB_SERVER_URL }} + RUN_ID: ${{ github.run_id }} + run: | + set -euo pipefail + API="${SERVER_URL%/}/api/v1" + TITLE="Railway env-var drift detected" + RUN_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}" + BODY=$(jq -nc --arg t "$TITLE" --arg log "${AUDIT_LOG:-(log unavailable)}" --arg run "$RUN_URL" ' + {body: ("Daily Railway pin audit found drift-prone image-tag pins in the molecule-platform Railway project.\n\n**What this means:** an env var (likely on `controlplane`) is pinned to a SHA-shaped or semver tag instead of a floating tag. Same pattern that caused the 2026-04-24 TENANT_IMAGE incident — fix-PRs land but the running service does not pick them up.\n\n**Recovery:** open the Railway dashboard, replace the flagged value with a floating tag (:staging-latest, :main) unless the pin is intentional and documented in the ops runbook.\n\n**Audit output:**\n\n```\n" + $log + "\n```\n\nRun: " + $run + "\n\nCloses automatically when a subsequent daily run reports clean.")}') + + # Look for existing open drift issue with the title. + EXISTING=$(curl -fsS -H "Authorization: token $GITEA_TOKEN" \ + "${API}/repos/${REPO}/issues?state=open&type=issues&limit=50" \ + | jq -r --arg t "$TITLE" '.[] | select(.title==$t) | .number' | head -1) + + if [ -n "$EXISTING" ]; then + COMMENT_BODY=$(jq -nc --arg log "${AUDIT_LOG:-(log unavailable)}" --arg run "$RUN_URL" \ + '{body: ("Still drifting. " + $run + "\n\n```\n" + $log + "\n```")}') + curl -fsS -X POST -H "Authorization: token $GITEA_TOKEN" -H "Content-Type: application/json" \ + "${API}/repos/${REPO}/issues/${EXISTING}/comments" -d "$COMMENT_BODY" >/dev/null + echo "Commented on existing issue #${EXISTING}" + else + CREATE_BODY=$(echo "$BODY" | jq --arg t "$TITLE" '. + {title: $t, labels: []}') + NUM=$(curl -fsS -X POST -H "Authorization: token $GITEA_TOKEN" -H "Content-Type: application/json" \ + "${API}/repos/${REPO}/issues" -d "$CREATE_BODY" | jq -r .number) + echo "Filed issue #${NUM}" + fi + + - name: Close stale drift issue on clean run (Gitea API) + if: success() && steps.audit.outputs.rc == '0' + env: + GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + SERVER_URL: ${{ env.GITHUB_SERVER_URL }} + RUN_ID: ${{ github.run_id }} + run: | + set -euo pipefail + API="${SERVER_URL%/}/api/v1" + TITLE="Railway env-var drift detected" + RUN_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}" + + NUMS=$(curl -fsS -H "Authorization: token $GITEA_TOKEN" \ + "${API}/repos/${REPO}/issues?state=open&type=issues&limit=50" \ + | jq -r --arg t "$TITLE" '.[] | select(.title==$t) | .number') + + for N in $NUMS; do + curl -fsS -X POST -H "Authorization: token $GITEA_TOKEN" -H "Content-Type: application/json" \ + "${API}/repos/${REPO}/issues/${N}/comments" \ + -d "$(jq -nc --arg run "$RUN_URL" '{body: ("Daily audit clean — drift resolved. " + $run)}')" >/dev/null + curl -fsS -X PATCH -H "Authorization: token $GITEA_TOKEN" -H "Content-Type: application/json" \ + "${API}/repos/${REPO}/issues/${N}" -d '{"state":"closed"}' >/dev/null + echo "Closed #${N}" + done diff --git a/.gitea/workflows/runtime-pin-compat.yml b/.gitea/workflows/runtime-pin-compat.yml new file mode 100644 index 00000000..6fe493d1 --- /dev/null +++ b/.gitea/workflows/runtime-pin-compat.yml @@ -0,0 +1,100 @@ +name: Runtime Pin Compatibility + +# Ported from .github/workflows/runtime-pin-compat.yml on 2026-05-11 per +# RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - Dropped `merge_group:` (no Gitea merge queue) and +# `workflow_dispatch:` (no inputs, but the trigger itself is +# parser-rejected when inputs are absent in some Gitea 1.22.x +# builds; safest to drop entirely — manual runs go via cron-trigger +# bump or push-with-paths-filter). +# - on.paths references .gitea/workflows/runtime-pin-compat.yml (this +# file) instead of the .github/ one. +# - Workflow-level env.GITHUB_SERVER_URL set. +# - `continue-on-error: true` on the job (RFC §1 contract). +# +# CI gate that prevents the 5-hour staging outage from 2026-04-24 from +# recurring (controlplane#253). The original failure mode: +# 1. molecule-ai-workspace-runtime 0.1.13 declared `a2a-sdk<1.0` in its +# requires_dist metadata (incorrect — it actually imports +# a2a.server.routes which only exists in a2a-sdk 1.0+) +# 2. `pip install molecule-ai-workspace-runtime` resolved cleanly +# 3. `from molecule_runtime.main import main_sync` raised ImportError +# 4. Every tenant workspace crashed; the canary tenant caught it but +# only after 5 hours of degraded staging +# +# This workflow installs the CURRENTLY PUBLISHED runtime from PyPI on +# top of `workspace/requirements.txt` and smoke-imports. Catches: +# - Upstream PyPI yanks +# - Bad re-releases of molecule-ai-workspace-runtime +# - Already-shipped wheels that stop importing because a transitive +# dep moved underneath + +on: + push: + branches: [main, staging] + paths: + # Narrow filter: pypi-latest is sensitive only to changes that + # affect what we're INSTALLING (requirements.txt) or WHAT THE + # CHECK ITSELF DOES (this workflow file). Edits to workspace/ + # source code don't change what's on PyPI right now, so they + # don't change this gate's verdict. + - 'workspace/requirements.txt' + - '.gitea/workflows/runtime-pin-compat.yml' + pull_request: + branches: [main, staging] + paths: + - 'workspace/requirements.txt' + - '.gitea/workflows/runtime-pin-compat.yml' + # Daily catch for upstream PyPI publishes that break the pin combo + # without any change in our repo (e.g. someone re-yanks an a2a-sdk + # release or molecule-ai-workspace-runtime publishes a bad bump). + schedule: + - cron: '0 13 * * *' # 06:00 PT + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pypi-latest-install: + name: PyPI-latest install + import smoke + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking + # the PR. Follow-up PR flips this off after surfaced defects are + # triaged. + continue-on-error: true + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.11' + cache: pip + cache-dependency-path: workspace/requirements.txt + - name: Install runtime + workspace requirements + # Install order is load-bearing: install the runtime FIRST so pip + # honors whatever a2a-sdk constraint the runtime metadata declares + # (this is the surface that broke in 2026-04-24 — runtime declared + # `a2a-sdk<1.0` but actually needed >=1.0). The follow-up install + # of workspace/requirements.txt then upgrades a2a-sdk to the + # constraint our runtime image actually pins. The import smoke + # below verifies the upgraded combination is consistent. + run: | + python -m venv /tmp/venv + /tmp/venv/bin/pip install --upgrade pip + /tmp/venv/bin/pip install molecule-ai-workspace-runtime + /tmp/venv/bin/pip install -r workspace/requirements.txt + /tmp/venv/bin/pip show molecule-ai-workspace-runtime a2a-sdk \ + | grep -E '^(Name|Version):' + - name: Smoke import — fail if metadata declares deps that don't satisfy real imports + # WORKSPACE_ID is validated at import time by platform_auth.py — EC2 + # user-data sets it from the cloud-init template; set a placeholder + # here so the import smoke doesn't trip on the env-var guard. + env: + WORKSPACE_ID: 00000000-0000-0000-0000-000000000001 + run: | + /tmp/venv/bin/python -c "from molecule_runtime.main import main_sync; print('runtime imports OK')" diff --git a/.gitea/workflows/runtime-prbuild-compat.yml b/.gitea/workflows/runtime-prbuild-compat.yml new file mode 100644 index 00000000..71145434 --- /dev/null +++ b/.gitea/workflows/runtime-prbuild-compat.yml @@ -0,0 +1,139 @@ +name: Runtime PR-Built Compatibility + +# Ported from .github/workflows/runtime-prbuild-compat.yml on 2026-05-11 +# per RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - Dropped `merge_group:` (no Gitea merge queue) and `workflow_dispatch:` +# (Gitea 1.22.6 parser-rejects workflow_dispatch with inputs and is +# finicky without them). +# - `dorny/paths-filter@v4` replaced with inline `git diff` (per PR#372 +# pattern for ci.yml port). +# - on.paths references .gitea/workflows/runtime-prbuild-compat.yml. +# - Workflow-level env.GITHUB_SERVER_URL set. +# - `continue-on-error: true` on every job (RFC §1 contract). +# +# Companion to `runtime-pin-compat.yml`. That workflow tests what's +# CURRENTLY PUBLISHED on PyPI; this workflow tests what WOULD BE +# PUBLISHED if THIS PR merges. +# +# Why two workflows: the chicken-and-egg #128 fix added a "PR-built +# wheel" job to the original runtime-pin-compat.yml, but both jobs +# shared a `paths:` filter that was the union of their needs +# (`workspace/**`). That meant the PyPI-latest job ran on every doc +# edit even though the upstream PyPI artifact can't change with our +# workspace/ source. Splitting the two means each gets a narrow +# `paths:` filter that matches the inputs it actually depends on. +# +# Catches the failure mode where a PR adds an import requiring a newer +# SDK than `workspace/requirements.txt` pins: +# 1. Pip resolves the existing PyPI wheel + the old SDK pin -> smoke +# passes (it imports the OLD main.py from the wheel, not the PR's +# new main.py). +# 2. Merge -> publish-runtime.yml ships a wheel WITH the new import. +# 3. Tenant images redeploy -> all crash on first boot with ImportError. + +on: + push: + branches: [main, staging] + pull_request: + branches: [main, staging] + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +concurrency: + # event_name + sha keeps PR sync and the subsequent staging push on the + # same SHA from cancelling each other (per feedback_concurrency_group_per_sha). + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.head.sha || github.sha }} + cancel-in-progress: true + +jobs: + detect-changes: + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking. + continue-on-error: true + outputs: + wheel: ${{ steps.decide.outputs.wheel }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + - id: decide + run: | + # Inline replacement for dorny/paths-filter — same pattern + # PR#372's ci.yml port used. Diffs against the PR base or the + # previous push SHA, then matches against the wheel-relevant + # path set. + BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}" + if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then + BASE="${{ github.event.pull_request.base.sha }}" + fi + if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then + # New branch or no previous SHA: treat as wheel-relevant. + echo "wheel=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + if ! git cat-file -e "$BASE" 2>/dev/null; then + git fetch --depth=1 origin "$BASE" 2>/dev/null || true + fi + if ! git cat-file -e "$BASE" 2>/dev/null; then + echo "wheel=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + CHANGED=$(git diff --name-only "$BASE" HEAD) + if echo "$CHANGED" | grep -qE '^(workspace/|scripts/build_runtime_package\.py$|scripts/wheel_smoke\.py$|\.gitea/workflows/runtime-prbuild-compat\.yml$)'; then + echo "wheel=true" >> "$GITHUB_OUTPUT" + else + echo "wheel=false" >> "$GITHUB_OUTPUT" + fi + + # ONE job (no job-level `if:`) that always runs and reports under the + # required-check name `PR-built wheel + import smoke`. Real work is + # gated per-step on `needs.detect-changes.outputs.wheel`. + local-build-install: + needs: detect-changes + name: PR-built wheel + import smoke + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking. + continue-on-error: true + steps: + - name: No-op pass (paths filter excluded this commit) + if: needs.detect-changes.outputs.wheel != 'true' + run: | + echo "No workspace/ / scripts/{build_runtime_package,wheel_smoke}.py / workflow changes — wheel gate satisfied without rebuilding." + echo "::notice::PR-built wheel + import smoke no-op pass (paths filter excluded this commit)." + - if: needs.detect-changes.outputs.wheel == 'true' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - if: needs.detect-changes.outputs.wheel == 'true' + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.11' + cache: pip + cache-dependency-path: workspace/requirements.txt + - name: Install build tooling + if: needs.detect-changes.outputs.wheel == 'true' + run: pip install build + - name: Build wheel from PR source (mirrors publish-runtime.yml) + if: needs.detect-changes.outputs.wheel == 'true' + # Use a fixed test version so the wheel filename is predictable. + # Doesn't reach PyPI — this build is local-only for the smoke. + run: | + python scripts/build_runtime_package.py \ + --version "0.0.0.dev0+pin-compat" \ + --out /tmp/runtime-build + cd /tmp/runtime-build && python -m build + - name: Install built wheel + workspace requirements + if: needs.detect-changes.outputs.wheel == 'true' + run: | + python -m venv /tmp/venv-built + /tmp/venv-built/bin/pip install --upgrade pip + /tmp/venv-built/bin/pip install /tmp/runtime-build/dist/*.whl + /tmp/venv-built/bin/pip install -r workspace/requirements.txt + /tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \ + | grep -E '^(Name|Version):' + - name: Smoke import the PR-built wheel + if: needs.detect-changes.outputs.wheel == 'true' + # Same script publish-runtime.yml runs against the to-be-PyPI wheel. + run: | + /tmp/venv-built/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py" diff --git a/.gitea/workflows/secret-pattern-drift.yml b/.gitea/workflows/secret-pattern-drift.yml new file mode 100644 index 00000000..a2520b54 --- /dev/null +++ b/.gitea/workflows/secret-pattern-drift.yml @@ -0,0 +1,70 @@ +name: SECRET_PATTERNS drift lint + +# Ported from .github/workflows/secret-pattern-drift.yml on 2026-05-11 +# per RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - on.paths references the new canonical .gitea/workflows/secret-scan.yml +# (the .github/ copy is removed by Cat A of this sweep). +# - CANONICAL_FILE inside scripts/lint_secret_pattern_drift.py was +# updated in the same Cat C-1 PR to point at .gitea/workflows/secret-scan.yml. +# - Workflow-level env.GITHUB_SERVER_URL set. +# - `continue-on-error: true` on the job (RFC §1 contract). +# +# Detects when the canonical SECRET_PATTERNS array in +# .gitea/workflows/secret-scan.yml diverges from known consumer +# mirrors (workspace-runtime's bundled pre-commit hook today; more +# can be added as the consumer set grows). +# +# Why this exists: every side that scans for credentials has its own +# copy of the pattern list. They drift — most recently the runtime +# hook lagged the canonical by one pattern (sk-cp- / MiniMax F1088), +# so a developer's local pre-commit would let a sk-cp- token through +# while the org-wide CI scan would refuse it. The cost of that drift +# is dev confusion + delayed feedback; the fix is automated detection. +# +# Triggers: +# - schedule: daily 05:00 UTC. Catches drift introduced by edits +# to a consumer copy that didn't update canonical here. +# - push to main/staging where the canonical or this lint changed: +# catches the inverse — canonical updated but consumers not yet +# bumped. The lint will fail the push; that's intentional. + +on: + schedule: + # 05:00 UTC = 22:00 PT / 01:00 ET. Quiet hours so a failure + # email lands when humans are starting their day, not + # interrupting it. + - cron: "0 5 * * *" + push: + branches: [main, staging] + paths: + - ".gitea/workflows/secret-scan.yml" + - ".gitea/workflows/secret-pattern-drift.yml" + - ".github/scripts/lint_secret_pattern_drift.py" + - ".githooks/pre-commit" + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +# Auto-injected GITHUB_TOKEN scoped to read-only. The lint only does git +# checkout + HTTPS GETs to public consumer files; no writes to anything. +permissions: + contents: read + +jobs: + lint: + name: Detect SECRET_PATTERNS drift + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking. + continue-on-error: true + timeout-minutes: 5 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.11" + + - name: Run drift lint + run: python3 .github/scripts/lint_secret_pattern_drift.py diff --git a/.gitea/workflows/test-ops-scripts.yml b/.gitea/workflows/test-ops-scripts.yml new file mode 100644 index 00000000..1a676deb --- /dev/null +++ b/.gitea/workflows/test-ops-scripts.yml @@ -0,0 +1,65 @@ +name: Ops Scripts Tests + +# Ported from .github/workflows/test-ops-scripts.yml on 2026-05-11 per +# RFC internal#219 §1 sweep. +# +# Differences from the GitHub version: +# - Dropped `merge_group:` trigger (no Gitea merge queue). +# - on.paths references .gitea/workflows/test-ops-scripts.yml (this +# file) instead of the .github/ one. +# - Workflow-level env.GITHUB_SERVER_URL set. +# - `continue-on-error: true` on the job (RFC §1 contract). +# +# Runs the unittest suite for scripts/ on every PR + push that touches +# anything under scripts/. Kept separate from the main CI so a script-only +# change doesn't trigger the heavier Go/Canvas/Python pipelines. +# +# Discovery layout: tests sit alongside the code they test (see +# scripts/ops/test_sweep_cf_decide.py for the pattern; scripts/ +# test_build_runtime_package.py for the rewriter coverage). The job +# below runs `unittest discover` TWICE — once from `scripts/`, once +# from `scripts/ops/` — because neither dir has an `__init__.py`, so +# a single discover from `scripts/` doesn't recurse into the ops +# subdir. Two passes is simpler than retrofitting namespace packages. + +on: + push: + branches: [main, staging] + paths: + - 'scripts/**' + - '.gitea/workflows/test-ops-scripts.yml' + pull_request: + branches: [main, staging] + paths: + - 'scripts/**' + - '.gitea/workflows/test-ops-scripts.yml' + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Ops scripts (unittest) + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface broken workflows without blocking. + continue-on-error: true + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.11' + - name: Run scripts/ unittests (build_runtime_package, ...) + # Top-level scripts/ tests live alongside their target file + # (e.g. scripts/test_build_runtime_package.py exercises + # scripts/build_runtime_package.py). discover from scripts/ + # picks up only top-level test_*.py because scripts/ops/ has + # no __init__.py — that's intentional, so we run two passes. + working-directory: scripts + run: python -m unittest discover -t . -p 'test_*.py' -v + - name: Run scripts/ops/ unittests (sweep_cf_decide, ...) + working-directory: scripts/ops + run: python -m unittest discover -p 'test_*.py' -v diff --git a/.github/scripts/lint_secret_pattern_drift.py b/.github/scripts/lint_secret_pattern_drift.py index c630094f..4835e875 100644 --- a/.github/scripts/lint_secret_pattern_drift.py +++ b/.github/scripts/lint_secret_pattern_drift.py @@ -28,7 +28,7 @@ import sys import urllib.request from pathlib import Path -CANONICAL_FILE = Path(".github/workflows/secret-scan.yml") +CANONICAL_FILE = Path(".gitea/workflows/secret-scan.yml") # Public consumer mirrors. Each entry is (label, raw_url) — raw_url # points at the file's RAW content on the consumer's default branch