fix(ci): recover current main red blockers #904
@ -107,16 +107,25 @@ jobs:
|
||||
echo "scripts=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
# Both .github/workflows/ci.yml AND .gitea/workflows/ci.yml count
|
||||
# as "this workflow changed" — either edit should force-run every
|
||||
# downstream job. The Gitea port follows the same shape as the
|
||||
# GitHub original so behavior matches when triggered on either
|
||||
# platform.
|
||||
DIFF=$(git diff --name-only "$BASE" HEAD 2>/dev/null || echo ".gitea/workflows/ci.yml")
|
||||
echo "platform=$(echo "$DIFF" | grep -qE '^workspace-server/|^\.gitea/workflows/ci\.yml$|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "canvas=$(echo "$DIFF" | grep -qE '^canvas/|^\.gitea/workflows/ci\.yml$|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "python=$(echo "$DIFF" | grep -qE '^workspace/|^\.gitea/workflows/ci\.yml$|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/|^\.gitea/workflows/ci\.yml$|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
# Workflow-only edits are covered by the workflow lint family
|
||||
# and by this workflow's always-present required jobs. Do not fan
|
||||
# those edits out into Go/Canvas/Python/shellcheck work; the
|
||||
# downstream jobs still emit their required contexts via no-op
|
||||
# steps when their surface flag is false.
|
||||
#
|
||||
# If the diff itself cannot be trusted, fail open by running every
|
||||
# surface instead of silently under-testing the PR.
|
||||
if ! DIFF=$(git diff --name-only "$BASE" HEAD 2>/dev/null); then
|
||||
echo "platform=true" >> "$GITHUB_OUTPUT"
|
||||
echo "canvas=true" >> "$GITHUB_OUTPUT"
|
||||
echo "python=true" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
echo "platform=$(echo "$DIFF" | grep -qE '^workspace-server/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "canvas=$(echo "$DIFF" | grep -qE '^canvas/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "python=$(echo "$DIFF" | grep -qE '^workspace/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Platform (Go) — Go build/vet/test/lint + coverage gates. The always-run
|
||||
# + per-step gating shape preserves the GitHub-side required-check name
|
||||
@ -380,17 +389,27 @@ jobs:
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
needs: [changes, canvas-build]
|
||||
# Only fires on direct pushes to main (i.e. after staging→main promotion).
|
||||
if: needs.changes.outputs.canvas == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
# Keep the job itself always runnable. Gitea 1.22.6 leaves job-level
|
||||
# event/ref `if:` gates as pending on PRs, which blocks the combined
|
||||
# status even though this reminder is intentionally non-required.
|
||||
steps:
|
||||
- name: Write deploy reminder to step summary
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
CANVAS_CHANGED: ${{ needs.changes.outputs.canvas }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref }}
|
||||
# github.server_url resolves via the workflow-level env override
|
||||
# to the Gitea instance, so the RUN_URL points at the Gitea run
|
||||
# page (not github.com). See feedback_act_runner_github_server_url.
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ "$CANVAS_CHANGED" != "true" ] || [ "$EVENT_NAME" != "push" ] || [ "$REF_NAME" != "refs/heads/main" ]; then
|
||||
echo "Canvas deploy reminder not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Write body to a temp file — avoids backtick escaping in shell.
|
||||
cat > /tmp/deploy-reminder.md << 'BODY'
|
||||
## Canvas build passed — deploy required
|
||||
@ -535,11 +554,10 @@ jobs:
|
||||
# hourly if this list diverges from status_check_contexts or from
|
||||
# audit-force-merge.yml's REQUIRED_CHECKS env (RFC §4 + §6).
|
||||
#
|
||||
# Excluded from `needs:`: `canvas-deploy-reminder` — gated by
|
||||
# `if: ... github.event_name == 'push' && github.ref == 'refs/heads/main'`,
|
||||
# so on PR events it's legitimately `skipped`. The drift detector
|
||||
# explicitly excludes `github.event_name`-gated jobs from F1 (see
|
||||
# `.gitea/scripts/ci-required-drift.py::ci_job_names`).
|
||||
# Excluded from `needs:`: `canvas-deploy-reminder` — it is an
|
||||
# operational reminder, not a CI prerequisite. Keep that job runnable
|
||||
# on PRs with an internal no-op guard; job-level event/ref `if:` gates
|
||||
# are a Gitea 1.22.6 pending-status trap.
|
||||
#
|
||||
# Phase 3 (RFC #219 §1) safety: underlying build jobs carry
|
||||
# continue-on-error: true so their failures are masked to null (2026-05-12: re-enabled mc#774 interim)
|
||||
@ -559,7 +577,7 @@ jobs:
|
||||
- canvas-build
|
||||
- shellcheck
|
||||
- python-lint
|
||||
if: always()
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- name: Assert every required dependency succeeded
|
||||
run: |
|
||||
|
||||
@ -36,17 +36,19 @@ name: redeploy-tenants-on-main
|
||||
#
|
||||
# Runtime ordering:
|
||||
# 1. publish-workspace-server-image completes → new :staging-<sha> in ECR.
|
||||
# 2. This workflow fires via workflow_run, calls redeploy-fleet with
|
||||
# target_tag=staging-<sha>. No CDN propagation wait needed —
|
||||
# ECR image manifest is consistent immediately after push.
|
||||
# 2. The merge that updates publish-workspace-server-image.yml triggers
|
||||
# this push/path-filtered workflow, which calls redeploy-fleet with
|
||||
# target_tag=staging-<sha>. No CDN propagation wait needed — ECR image
|
||||
# manifest is consistent immediately after push.
|
||||
# 3. Calls redeploy-fleet with canary_slug (if set) and a soak
|
||||
# period. Canary proves the image boots; batches follow.
|
||||
# 4. Any failure aborts the rollout and leaves older tenants on the
|
||||
# prior image — safer default than half-and-half state.
|
||||
#
|
||||
# Rollback path: re-run this workflow with a specific SHA pinned via
|
||||
# the workflow_dispatch input. That calls redeploy-fleet with
|
||||
# target_tag=<sha>, re-pulling the older image on every tenant.
|
||||
# Rollback path: set PROD_MANUAL_REDEPLOY_TARGET_TAG as a repo/org
|
||||
# variable or secret, run workflow_dispatch, then unset it after the
|
||||
# rollback. That calls redeploy-fleet with target_tag=<value>,
|
||||
# re-pulling the pinned image on every tenant.
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -77,13 +79,11 @@ env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: production redeploy is a side-effect workflow, not a merge gate.
|
||||
redeploy:
|
||||
# Skip the auto-trigger if publish-workspace-server-image didn't
|
||||
# actually succeed. workflow_run fires on any completion state; we
|
||||
# don't want to redeploy against a half-built image.
|
||||
# NOTE (Gitea port): workflow_dispatch trigger dropped; only the
|
||||
# workflow_run path remains.
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
# Gitea 1.22.6 does not support workflow_run. This workflow is now
|
||||
# controlled by push/path triggers plus an explicit kill switch.
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
@ -119,16 +119,16 @@ jobs:
|
||||
# dead (staging-verify soft-skips without canary fleet, so
|
||||
# the only thing retagging `:latest` today is the manual
|
||||
# promote-latest.yml — last run 2026-04-28). Auto-trigger
|
||||
# from workflow_run uses workflow_run.head_sha; manual
|
||||
# dispatch with no input falls through to github.sha.
|
||||
# from the main push uses github.sha; manual
|
||||
# dispatch with no variable falls through to github.sha.
|
||||
env:
|
||||
INPUT_TAG: ${{ inputs.target_tag }}
|
||||
HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||
PROD_MANUAL_REDEPLOY_TARGET_TAG: ${{ vars.PROD_MANUAL_REDEPLOY_TARGET_TAG || secrets.PROD_MANUAL_REDEPLOY_TARGET_TAG || '' }}
|
||||
HEAD_SHA: ${{ github.sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -n "${INPUT_TAG:-}" ]; then
|
||||
echo "target_tag=$INPUT_TAG" >> "$GITHUB_OUTPUT"
|
||||
echo "Using operator-pinned tag: $INPUT_TAG"
|
||||
if [ -n "${PROD_MANUAL_REDEPLOY_TARGET_TAG:-}" ]; then
|
||||
echo "target_tag=$PROD_MANUAL_REDEPLOY_TARGET_TAG" >> "$GITHUB_OUTPUT"
|
||||
echo "Using operator-pinned tag from PROD_MANUAL_REDEPLOY_TARGET_TAG."
|
||||
else
|
||||
SHORT="${HEAD_SHA:0:7}"
|
||||
echo "target_tag=staging-$SHORT" >> "$GITHUB_OUTPUT"
|
||||
@ -144,13 +144,26 @@ jobs:
|
||||
CP_URL: ${{ vars.CP_URL || 'https://api.moleculesai.app' }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
TARGET_TAG: ${{ steps.tag.outputs.target_tag }}
|
||||
CANARY_SLUG: ${{ inputs.canary_slug || 'hongming' }}
|
||||
SOAK_SECONDS: ${{ inputs.soak_seconds || '60' }}
|
||||
BATCH_SIZE: ${{ inputs.batch_size || '3' }}
|
||||
DRY_RUN: ${{ inputs.dry_run || false }}
|
||||
CANARY_SLUG: ${{ vars.PROD_REDEPLOY_CANARY_SLUG || secrets.PROD_REDEPLOY_CANARY_SLUG || '' }}
|
||||
SOAK_SECONDS: ${{ vars.PROD_REDEPLOY_SOAK_SECONDS || secrets.PROD_REDEPLOY_SOAK_SECONDS || '' }}
|
||||
BATCH_SIZE: ${{ vars.PROD_REDEPLOY_BATCH_SIZE || secrets.PROD_REDEPLOY_BATCH_SIZE || '' }}
|
||||
DRY_RUN: ${{ vars.PROD_REDEPLOY_DRY_RUN || secrets.PROD_REDEPLOY_DRY_RUN || '' }}
|
||||
PROD_AUTO_DEPLOY_DISABLED: ${{ vars.PROD_AUTO_DEPLOY_DISABLED || secrets.PROD_AUTO_DEPLOY_DISABLED || '' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
case "${PROD_AUTO_DEPLOY_DISABLED,,}" in
|
||||
1|true|yes|on)
|
||||
echo "::notice::PROD_AUTO_DEPLOY_DISABLED is set; skipping production redeploy."
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
CANARY_SLUG="${CANARY_SLUG:-hongming}"
|
||||
SOAK_SECONDS="${SOAK_SECONDS:-60}"
|
||||
BATCH_SIZE="${BATCH_SIZE:-3}"
|
||||
DRY_RUN="${DRY_RUN:-false}"
|
||||
|
||||
if [ -z "${CP_ADMIN_API_TOKEN:-}" ]; then
|
||||
echo "::error::CP_ADMIN_API_TOKEN secret not set — skipping redeploy"
|
||||
echo "::notice::Set CP_ADMIN_API_TOKEN in repo secrets to enable auto-redeploy."
|
||||
@ -172,7 +185,7 @@ jobs:
|
||||
}')
|
||||
|
||||
echo "POST $CP_URL/cp/admin/tenants/redeploy-fleet"
|
||||
echo " body: $BODY"
|
||||
echo " target_tag=$TARGET_TAG canary=$CANARY_SLUG soak_seconds=$SOAK_SECONDS batch_size=$BATCH_SIZE dry_run=$DRY_RUN"
|
||||
|
||||
HTTP_RESPONSE=$(mktemp)
|
||||
HTTP_CODE_FILE=$(mktemp)
|
||||
@ -281,10 +294,10 @@ jobs:
|
||||
if [ "$TARGET_TAG" != "latest" ] \
|
||||
&& [ "$TARGET_TAG" != "$EXPECTED_SHA" ] \
|
||||
&& [ "$TARGET_TAG" != "staging-$EXPECTED_SHORT" ]; then
|
||||
# workflow_dispatch with a pinned tag that isn't the head
|
||||
# Manual redeploy with a pinned tag that isn't the head
|
||||
# SHA — operator is rolling back / pinning. Skip the
|
||||
# verification because we don't have the expected SHA in
|
||||
# this context (would need to crane-inspect the GHCR
|
||||
# this context (would need to inspect the ECR
|
||||
# manifest, which is a follow-up). Failing-open here is
|
||||
# safe: the operator chose the tag deliberately.
|
||||
#
|
||||
|
||||
@ -22,6 +22,7 @@ Cross-links:
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
@ -542,3 +543,89 @@ def test_rule9_prod_manual_deploy_allows_rollback_control(tmp_path):
|
||||
_write(tmp_path, "ok.yml", PROD_ROLLBACK_OK)
|
||||
r = _run_lint(tmp_path)
|
||||
assert r.returncode == 0, f"stdout={r.stdout}\nstderr={r.stderr}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CI change detector fanout — workflow-only PRs keep required contexts without
|
||||
# running Go/Canvas/Python/shellcheck heavy steps.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CI_WORKFLOW = REPO_ROOT / ".gitea" / "workflows" / "ci.yml"
|
||||
CI_SURFACES = ("platform", "canvas", "python", "scripts")
|
||||
|
||||
|
||||
def _ci_change_patterns() -> dict[str, re.Pattern[str]]:
|
||||
text = CI_WORKFLOW.read_text(encoding="utf-8")
|
||||
patterns: dict[str, re.Pattern[str]] = {}
|
||||
for surface, pattern in re.findall(
|
||||
r'echo "(platform|canvas|python|scripts)=.*?grep -qE \'([^\']+)\'',
|
||||
text,
|
||||
):
|
||||
patterns[surface] = re.compile(pattern)
|
||||
assert set(patterns) == set(CI_SURFACES)
|
||||
return patterns
|
||||
|
||||
|
||||
def _classify_ci_change(*paths: str) -> dict[str, bool]:
|
||||
patterns = _ci_change_patterns()
|
||||
return {
|
||||
surface: any(pattern.search(path) for path in paths)
|
||||
for surface, pattern in patterns.items()
|
||||
}
|
||||
|
||||
|
||||
def test_ci_change_detector_workflow_only_edits_do_not_trigger_heavy_surfaces():
|
||||
assert _classify_ci_change(".gitea/workflows/ci.yml") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert _classify_ci_change(".github/workflows/ci.yml") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
|
||||
|
||||
def test_ci_change_detector_narrow_surface_edits_only_trigger_their_surface():
|
||||
assert _classify_ci_change("workspace-server/internal/handlers/foo.go") == {
|
||||
"platform": True,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert _classify_ci_change("canvas/app/page.tsx") == {
|
||||
"platform": False,
|
||||
"canvas": True,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert _classify_ci_change("workspace/a2a_mcp_server.py") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": True,
|
||||
"scripts": False,
|
||||
}
|
||||
assert _classify_ci_change("tests/e2e/test_model_slug.sh") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": True,
|
||||
}
|
||||
|
||||
|
||||
def test_ci_change_detector_docs_and_meta_scripts_do_not_trigger_surfaces():
|
||||
assert _classify_ci_change("README.md") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert _classify_ci_change(".gitea/scripts/lint-workflow-yaml.py") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user