Merge remote-tracking branch 'dev-lead/main' into pr693-test
Some checks failed
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 14s
CI / Detect changes (pull_request) Successful in 36s
E2E API Smoke Test / detect-changes (pull_request) Successful in 20s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 16s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 13s
Harness Replays / detect-changes (pull_request) Successful in 7s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 15s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 8s
gate-check-v3 / gate-check (pull_request) Successful in 9s
qa-review / approved (pull_request) Failing after 7s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m5s
security-review / approved (pull_request) Failing after 9s
sop-checklist / all-items-acked (pull_request) [soft-fail tier:low] acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: 7
sop-checklist-gate / gate (pull_request) Successful in 7s
sop-tier-check / tier-check (pull_request) Successful in 9s
CI / Canvas (Next.js) (pull_request) Successful in 5s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
CI / Python Lint & Test (pull_request) Successful in 2s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 4s
Harness Replays / Harness Replays (pull_request) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m22s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 6s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 5m36s
CI / Platform (Go) (pull_request) Successful in 12m10s
CI / all-required (pull_request) Successful in 2s
Some checks failed
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 14s
CI / Detect changes (pull_request) Successful in 36s
E2E API Smoke Test / detect-changes (pull_request) Successful in 20s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 16s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 13s
Harness Replays / detect-changes (pull_request) Successful in 7s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 15s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 8s
gate-check-v3 / gate-check (pull_request) Successful in 9s
qa-review / approved (pull_request) Failing after 7s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m5s
security-review / approved (pull_request) Failing after 9s
sop-checklist / all-items-acked (pull_request) [soft-fail tier:low] acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: 7
sop-checklist-gate / gate (pull_request) Successful in 7s
sop-tier-check / tier-check (pull_request) Successful in 9s
CI / Canvas (Next.js) (pull_request) Successful in 5s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
CI / Python Lint & Test (pull_request) Successful in 2s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 4s
Harness Replays / Harness Replays (pull_request) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m22s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 6s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 5m36s
CI / Platform (Go) (pull_request) Successful in 12m10s
CI / all-required (pull_request) Successful in 2s
This commit is contained in:
commit
a224740d4d
@ -37,7 +37,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -48,7 +48,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
@ -45,7 +45,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
|
||||
@ -126,7 +126,7 @@ jobs:
|
||||
name: Platform (Go)
|
||||
needs: changes
|
||||
runs-on: ubuntu-latest
|
||||
# mc#664 (interim): re-mask platform-build pending fix-forward. Phase 4
|
||||
# mc#774 (interim): re-mask platform-build pending fix-forward. Phase 4
|
||||
# (#656) flipped this to continue-on-error: false based on a Phase-3-masked
|
||||
# "green on main 2026-05-12" — the prior continue-on-error: true had
|
||||
# been hiding failing tests in workspace-server/internal/handlers/.
|
||||
@ -145,11 +145,11 @@ jobs:
|
||||
# Time-boxed Option A (90 min) did not fit the cross-cutting scope.
|
||||
# This is a sequenced revert→fix→reflip per
|
||||
# feedback_strict_root_only_after_class_a emergency clause — NOT
|
||||
# a permanent re-mask. Re-flip blocked on mc#664 fix-forward landing.
|
||||
# a permanent re-mask. Re-flip blocked on mc#774 fix-forward landing.
|
||||
# Other 4 #656 flips (changes, canvas-build, shellcheck, python-lint)
|
||||
# retain continue-on-error: false; only platform-build regresses.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#664 fix-forward in flight; re-flip when mc#664 lands (PR #669 → rebase after #709)
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#774 fix-forward in flight; re-flip when mc#774 lands (PR #669 → rebase after #709)
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
@ -187,7 +187,7 @@ jobs:
|
||||
echo "::group::pendinguploads exit=$pu_exit (last 100 lines)"
|
||||
tail -100 /tmp/test-pu.log
|
||||
echo "::endgroup::"
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Run tests with race detection and coverage
|
||||
@ -374,7 +374,7 @@ jobs:
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: ubuntu-latest
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
needs: [changes, canvas-build]
|
||||
# Only fires on direct pushes to main (i.e. after staging→main promotion).
|
||||
@ -539,7 +539,7 @@ jobs:
|
||||
# `.gitea/scripts/ci-required-drift.py::ci_job_names`).
|
||||
#
|
||||
# Phase 3 (RFC #219 §1) safety: underlying build jobs carry
|
||||
# continue-on-error: true so their failures are masked to null (2026-05-12: re-enabled mc#664 interim)
|
||||
# continue-on-error: true so their failures are masked to null (2026-05-12: re-enabled mc#774 interim)
|
||||
# (Gitea suppresses status reporting for CoE jobs). This sentinel
|
||||
# runs with continue-on-error: false so it always reports its
|
||||
# result to the API — without this, the required-status entry
|
||||
@ -572,7 +572,7 @@ jobs:
|
||||
import json, sys
|
||||
ns = json.load(sys.stdin)
|
||||
# Phase 3 masked: jobs with continue-on-error: true may report "failure"
|
||||
# Remove when mc#664 handler test failures are resolved.
|
||||
# Remove when mc#774 handler test failures are resolved.
|
||||
PHASE3_MASKED = {"platform-build"}
|
||||
# Exclude null (Phase 3 suppressed / in-flight) from the bad list.
|
||||
bad = [(k, v.get("result")) for k, v in ns.items()
|
||||
|
||||
@ -90,7 +90,7 @@ jobs:
|
||||
name: Synthetic E2E against staging
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# Bumped from 12 → 20 (2026-05-04). Tenant user-data install phase
|
||||
# (apt-get update + install docker.io/jq/awscli/caddy + snap install
|
||||
|
||||
@ -103,7 +103,7 @@ jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
api: ${{ steps.decide.outputs.api }}
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
name: E2E API Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
|
||||
@ -70,7 +70,7 @@ jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
canvas: ${{ steps.decide.outputs.canvas }}
|
||||
@ -119,7 +119,7 @@ jobs:
|
||||
name: Canvas tabs E2E
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 40
|
||||
|
||||
|
||||
@ -84,7 +84,7 @@ jobs:
|
||||
name: E2E Staging External Runtime
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
|
||||
|
||||
@ -88,20 +88,20 @@ jobs:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
- name: YAML validation (best-effort)
|
||||
run: |
|
||||
echo "e2e-staging-saas.yml — PR validation: workflow YAML is valid."
|
||||
echo "E2E step runs only when provisioning-critical files change."
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
# Actual E2E: runs on trunk pushes (main + staging). NOT the PR-fire-only
|
||||
@ -112,7 +112,7 @@ jobs:
|
||||
# Only runs on trunk pushes. PR paths get pr-validate instead.
|
||||
if: github.event.pull_request.base.ref == ''
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
|
||||
@ -37,7 +37,7 @@ jobs:
|
||||
name: Intentional-failure teardown sanity
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 20
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ env:
|
||||
jobs:
|
||||
gate-check:
|
||||
runs-on: ubuntu-latest
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # Never block on our own detector failing
|
||||
steps:
|
||||
- name: Check out BASE ref (never PR-head under pull_request_target)
|
||||
|
||||
@ -78,8 +78,8 @@ jobs:
|
||||
detect-changes:
|
||||
name: detect-changes
|
||||
runs-on: ubuntu-latest
|
||||
# mc#664 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
handlers: ${{ steps.filter.outputs.handlers }}
|
||||
@ -119,8 +119,8 @@ jobs:
|
||||
name: Handlers Postgres Integration
|
||||
needs: detect-changes
|
||||
runs-on: ubuntu-latest
|
||||
# mc#664 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
# Unique name per run so concurrent jobs don't collide on the
|
||||
|
||||
@ -63,7 +63,7 @@ jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
run: ${{ steps.decide.outputs.run }}
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
name: Harness Replays
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
name: lint-bp-context-emit-match
|
||||
|
||||
# Tier 2f scheduled lint (per internal#350) — detects drift between
|
||||
# Tier 2f scheduled lint (per mc#774) — detects drift between
|
||||
# `branch_protections/<branch>.status_check_contexts` and the set of
|
||||
# contexts emitted by `.gitea/workflows/*.yml`.
|
||||
#
|
||||
@ -60,7 +60,7 @@ name: lint-bp-context-emit-match
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - internal#350 (the RFC that specs this lint)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - internal#349 (cross-repo BP sweep)
|
||||
# - feedback_phantom_required_check_after_gitea_migration
|
||||
# - feedback_tier_label_ids_are_per_repo
|
||||
@ -94,7 +94,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface drift without blocking. After 7
|
||||
# clean scheduled runs on main, flip to false so a scheduled
|
||||
# failure is a hard CI signal.
|
||||
continue-on-error: true # mc#664 Phase 3 — flip to false after 7 clean main runs
|
||||
continue-on-error: true # mc#774 Phase 3 — flip to false after 7 clean main runs
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
name: lint-continue-on-error-tracking
|
||||
|
||||
# Tier 2e hard-gate lint (per mc#664) — every
|
||||
# Tier 2e hard-gate lint (per mc#774) — every
|
||||
# `continue-on-error: true` in `.gitea/workflows/*.yml` must carry a
|
||||
# `# mc#NNNN` or `# internal#NNNN` tracker comment within 2 lines,
|
||||
# the referenced issue must be OPEN, and ≤14 days old.
|
||||
@ -8,7 +8,7 @@ name: lint-continue-on-error-tracking
|
||||
# Why this exists
|
||||
# ---------------
|
||||
# `continue-on-error: true` on `platform-build` had been hiding
|
||||
# mc#664-class regressions for ~3 weeks before #656 surfaced them on
|
||||
# mc#774-class regressions for ~3 weeks before #656 surfaced them on
|
||||
# 2026-05-12. A 14-day cap on tracker age forces a review cycle and
|
||||
# surfaces mask-drift within at most 14 days of the original defect.
|
||||
# Each `continue-on-error: true` gets a paper trail — close or renew.
|
||||
@ -45,12 +45,12 @@ name: lint-continue-on-error-tracking
|
||||
# close-and-flip, or document the deliberate keep-mask in a fresh
|
||||
# 14-day-renewable tracker. After main is clean for 3 days,
|
||||
# follow-up PR flips this workflow's continue-on-error to false.
|
||||
# Tracking: mc#664.
|
||||
# Tracking: mc#774.
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - mc#664 (the RFC that specs this lint)
|
||||
# - mc#664 (the empirical masked-3-weeks case)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - mc#774 (the empirical masked-3-weeks case)
|
||||
# - feedback_chained_defects_in_never_tested_workflows
|
||||
# - feedback_behavior_based_ast_gates
|
||||
# - feedback_strict_root_only_after_class_a
|
||||
@ -96,9 +96,9 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface masked defects without blocking
|
||||
# PRs. Pre-existing continue-on-error: true directives on main
|
||||
# all violate this lint at first — intentional. Flip to false
|
||||
# follow-up after main is clean for 3 days. mc#664.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#664 Phase 3 mask — 14d forced-renewal cadence
|
||||
# follow-up after main is clean for 3 days. mc#774.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#774 Phase 3 mask — 14d forced-renewal cadence
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
|
||||
@ -45,7 +45,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
name: lint-mask-pr-atomicity
|
||||
|
||||
# Tier 2d hard-gate lint (per mc#664) — blocks PRs that touch
|
||||
# Tier 2d hard-gate lint (per mc#774) — blocks PRs that touch
|
||||
# `.gitea/workflows/ci.yml` and modify ONLY ONE of {continue-on-error,
|
||||
# all-required.sentinel.needs} without a `Paired: #NNN` reference in
|
||||
# the PR body or in a commit message.
|
||||
@ -37,13 +37,13 @@ name: lint-mask-pr-atomicity
|
||||
# This workflow lands at `continue-on-error: true` (Phase 3 — surface
|
||||
# regressions without blocking PRs while the rule beds in).
|
||||
# Follow-up PR flips to `false` once we have ≥3 days of clean runs on
|
||||
# `main` and no false-positives. Tracking issue: mc#664.
|
||||
# `main` and no false-positives. Tracking issue: mc#774.
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - mc#664 (the RFC that specs this lint)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - PR#665 / PR#668 (the empirical split-pair)
|
||||
# - mc#664 (the main-red incident the split caused)
|
||||
# - mc#774 (the main-red incident the split caused)
|
||||
# - feedback_strict_root_only_after_class_a
|
||||
# - feedback_behavior_based_ast_gates
|
||||
#
|
||||
@ -91,8 +91,8 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken shapes without blocking
|
||||
# PRs. Follow-up PR flips this to `false` once recent runs on main
|
||||
# are confirmed clean (eat-our-own-dogfood discipline mirrors
|
||||
# PR#673's same-shape comment). Tracking: mc#664.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# PR#673's same-shape comment). Tracking: mc#774.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Check out PR head with full history (need base SHA blobs)
|
||||
|
||||
@ -4,7 +4,7 @@ name: Lint pre-flip continue-on-error
|
||||
# on any job in `.gitea/workflows/*.yml` WITHOUT proof that the affected
|
||||
# job's recent runs on the target branch (PR base) are actually green.
|
||||
#
|
||||
# Empirical class: PR #656 / mc#664. PR #656 (RFC internal#219 Phase 4)
|
||||
# Empirical class: PR #656 / mc#774. PR #656 (RFC internal#219 Phase 4)
|
||||
# flipped 5 platform-build-class jobs `continue-on-error: true → false`
|
||||
# on the basis of a "verified green on main via combined-status check".
|
||||
# But that "green" was the LIE the prior `continue-on-error: true`
|
||||
@ -13,7 +13,7 @@ name: Lint pre-flip continue-on-error
|
||||
# job-level status. The precondition the PR claimed to verify was
|
||||
# structurally fooled by the bug being flipped.
|
||||
#
|
||||
# mc#664 captured the surfaced defects (2 mutually-masked regressions):
|
||||
# mc#774 captured the surfaced defects (2 mutually-masked regressions):
|
||||
# - Class 1: sqlmock helper drift since 2f36bb9a (24 days old)
|
||||
# - Class 2: OFFSEC-001 contract collision since 7d1a189f (1 day old)
|
||||
#
|
||||
@ -55,7 +55,7 @@ name: Lint pre-flip continue-on-error
|
||||
# - YAML parse error in one of the workflow files: warn-only,
|
||||
# don't block — the YAML lint workflows catch this separately.
|
||||
#
|
||||
# Cross-links: PR#656, mc#664, PR#665 (interim re-mask),
|
||||
# Cross-links: PR#656, mc#774, PR#665 (interim re-mask),
|
||||
# Quirk #10 (internal#342 + dup #287), hongming-pc2 charter
|
||||
# §SOP-N rule (e), feedback_strict_root_only_after_class_a,
|
||||
# feedback_no_shared_persona_token_use.
|
||||
@ -99,8 +99,8 @@ jobs:
|
||||
timeout-minutes: 8
|
||||
# Phase 3 (RFC internal#219 §1): surface broken flips without blocking
|
||||
# the PR yet. Follow-up flips this to `false` once the workflow itself
|
||||
# has clean recent runs on main. mc#664 interim — remove when CoE→false.
|
||||
continue-on-error: true # mc#664
|
||||
# has clean recent runs on main. mc#774 interim — remove when CoE→false.
|
||||
continue-on-error: true # mc#774
|
||||
steps:
|
||||
- name: Check out PR head (full history for base-SHA access)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
name: lint-required-context-exists-in-bp
|
||||
|
||||
# Tier 2g hard-gate lint (per internal#350) — diff-based PR-time
|
||||
# Tier 2g hard-gate lint (per mc#774) — diff-based PR-time
|
||||
# check. When a PR adds a NEW commit-status emission (workflow YAML
|
||||
# `name:` + job `name:`-or-key + on:-event), the workflow file must
|
||||
# carry one of three directives adjacent to the new job:
|
||||
@ -16,7 +16,7 @@ name: lint-required-context-exists-in-bp
|
||||
# PR#656 added `CI / all-required (pull_request)` as a sentinel
|
||||
# context that workflows emit, but BP did NOT list it. When
|
||||
# platform-build failed, all-required failed, but BP let the PR
|
||||
# merge anyway → cascade to mc#664. With this lint, PR#656 would
|
||||
# merge anyway → cascade to mc#774. With this lint, PR#656 would
|
||||
# have been blocked until either the BP PATCH ran alongside OR
|
||||
# the author added a `bp-required: pending` directive.
|
||||
#
|
||||
@ -27,7 +27,7 @@ name: lint-required-context-exists-in-bp
|
||||
# share the workflow-context enumeration helpers
|
||||
# (`_event_map`, `workflow_contexts`, `_job_display`) but the
|
||||
# semantics are intentionally distinct so they're separate scripts.
|
||||
# Co-design is documented in internal#350.
|
||||
# Co-design is documented in mc#774.
|
||||
#
|
||||
# Directive comment lives in the workflow file (NOT PR body)
|
||||
# ----------------------------------------------------------
|
||||
@ -42,13 +42,13 @@ name: lint-required-context-exists-in-bp
|
||||
# Lands at `continue-on-error: true` (Phase 3 — surface the
|
||||
# pattern without blocking PRs while the directive convention
|
||||
# beds in). After 7 days of clean runs on `main` with no false
|
||||
# positives, follow-up flips to `false`. Tracking: internal#350.
|
||||
# positives, follow-up flips to `false`. Tracking: mc#774.
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - internal#350 (the RFC that specs this lint)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - PR#656 (the empirical case)
|
||||
# - mc#664 (the surfaced cascade)
|
||||
# - mc#774 (the surfaced cascade)
|
||||
# - feedback_phantom_required_check_after_gitea_migration (Tier 2f cousin)
|
||||
# - feedback_behavior_based_ast_gates
|
||||
#
|
||||
@ -83,8 +83,8 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
# Phase 3 (RFC #219 §1): surface the pattern without blocking PRs
|
||||
# while the directive convention beds in. Follow-up flip to false
|
||||
# after 7 clean days on main. internal#350.
|
||||
continue-on-error: true # mc#664 Phase 3 — flip to false after 7 clean main runs
|
||||
# after 7 clean days on main. mc#774.
|
||||
continue-on-error: true # mc#774 Phase 3 — flip to false after 7 clean main runs
|
||||
steps:
|
||||
- name: Check out PR head with full history (need base SHA blobs)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -55,7 +55,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken shapes without blocking PRs.
|
||||
# Follow-up PR flips this off after the 4 existing-on-main rule-2
|
||||
# (workflow_run) violations are migrated to a supported trigger.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -9,18 +9,12 @@ name: publish-canvas-image
|
||||
# - Workflow-level env.GITHUB_SERVER_URL pinned per
|
||||
# feedback_act_runner_github_server_url.
|
||||
# - `continue-on-error: true` on each job (RFC §1 contract).
|
||||
# - **Open question for review**: this workflow pushes the canvas
|
||||
# image to `ghcr.io`. GHCR was retired during the 2026-05-06
|
||||
# Gitea migration in favor of ECR (per staging-verify.yml header
|
||||
# notes). The image may not be consumable post-migration. Two
|
||||
# options for follow-up: (a) retarget to
|
||||
# `153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas`,
|
||||
# or (b) retire this workflow entirely and route canvas deploys
|
||||
# via the operator-host build path. tier:low + continue-on-error
|
||||
# means failed pushes do not block PRs.
|
||||
# - Retargeted the image push from GHCR to ECR. GHCR was retired during
|
||||
# the 2026-05-06 Gitea migration, and Gitea's GITHUB_TOKEN cannot
|
||||
# authenticate to ghcr.io.
|
||||
#
|
||||
|
||||
# Builds and pushes the canvas Docker image to GHCR whenever a commit lands
|
||||
# Builds and pushes the canvas Docker image to ECR whenever a commit lands
|
||||
# on main that touches canvas code. Previously canvas changes were visible in
|
||||
# CI (npm run build passed) but the live container was never updated —
|
||||
# operators had to manually run `docker compose build canvas` each time.
|
||||
@ -45,10 +39,10 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write # required to push to ghcr.io/${{ github.repository_owner }}/*
|
||||
packages: write
|
||||
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/molecule-ai/canvas
|
||||
IMAGE_NAME: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
@ -62,22 +56,43 @@ jobs:
|
||||
# See issue #576 + infra-lead pulse ~00:30Z.
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ECR_REGISTRY="${IMAGE_NAME%%/*}"
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||
|
||||
- name: Ensure ECR repository exists
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
run: |
|
||||
set -euo pipefail
|
||||
repo_path="${IMAGE_NAME#*/}"
|
||||
if ! aws ecr describe-repositories --repository-names "${repo_path}" --region us-east-2 >/dev/null 2>&1; then
|
||||
aws ecr create-repository \
|
||||
--repository-name "${repo_path}" \
|
||||
--image-scanning-configuration scanOnPush=true \
|
||||
--region us-east-2 >/dev/null
|
||||
fi
|
||||
|
||||
# Health check: verify Docker daemon is accessible before attempting any
|
||||
# build steps. This fails loudly at step 1 when the runner's docker.sock
|
||||
# is inaccessible rather than silently continuing to the build step
|
||||
@ -126,7 +141,7 @@ jobs:
|
||||
echo "platform_url=${PLATFORM_URL}" >> "$GITHUB_OUTPUT"
|
||||
echo "ws_url=${WS_URL}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build & push canvas image to GHCR
|
||||
- name: Build & push canvas image to ECR
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: ./canvas
|
||||
@ -142,6 +157,6 @@ jobs:
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
||||
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.description=Molecule AI canvas (Next.js 15 + React Flow)
|
||||
|
||||
@ -55,7 +55,7 @@ jobs:
|
||||
# The actual bump work happens on the main/staging push after merge.
|
||||
pr-validate:
|
||||
runs-on: ubuntu-latest
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # do not block PR merge on operational failures
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -51,7 +51,7 @@ jobs:
|
||||
name: Audit Railway env vars for drift-prone pins
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 10
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ jobs:
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
|
||||
@ -76,7 +76,7 @@ jobs:
|
||||
redeploy:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
|
||||
@ -53,7 +53,7 @@ jobs:
|
||||
# runners with internet access to package mirrors). Falls back to GitHub
|
||||
# binary download. GitHub releases may be blocked on some runner networks
|
||||
# (infra#241 follow-up).
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
run: |
|
||||
if apt-get update -qq && apt-get install -y -qq jq; then
|
||||
|
||||
@ -67,7 +67,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -52,7 +52,7 @@ jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
wheel: ${{ steps.decide.outputs.wheel }}
|
||||
@ -97,7 +97,7 @@ jobs:
|
||||
name: PR-built wheel + import smoke
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
|
||||
@ -57,7 +57,7 @@ jobs:
|
||||
name: Detect SECRET_PATTERNS drift
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
|
||||
@ -64,8 +64,8 @@ jobs:
|
||||
tier-check:
|
||||
runs-on: ubuntu-latest
|
||||
# BURN-IN: continue-on-error prevents AND-composition from blocking
|
||||
# PRs during the 7-day window. Remove after 2026-05-17 (mc#664).
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# PRs during the 7-day window. Remove after 2026-05-17 (mc#774).
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
permissions:
|
||||
contents: read
|
||||
@ -90,7 +90,7 @@ jobs:
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense. continue-on-error: true ensures this step
|
||||
# failing does not block the job.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
run: |
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
@ -111,7 +111,7 @@ jobs:
|
||||
# continue-on-error: true at step level — job-level is ignored by Gitea
|
||||
# Actions (quirk #10, internal runbooks). Belt-and-suspenders with
|
||||
# SOP_FAIL_OPEN=1 + || true below.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
@ -85,7 +85,7 @@ jobs:
|
||||
staging-smoke:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
sha: ${{ steps.compute.outputs.sha }}
|
||||
@ -206,7 +206,7 @@ jobs:
|
||||
if: ${{ needs.staging-smoke.result == 'success' && needs.staging-smoke.outputs.smoke_ran == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
SHA: ${{ needs.staging-smoke.outputs.sha }}
|
||||
|
||||
@ -61,7 +61,7 @@ jobs:
|
||||
name: Sweep AWS Secrets Manager
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# 30 min cap, mirroring the other janitors. AWS DeleteSecret is
|
||||
# fast (~0.3s/call) so even a 100+ backlog drains in seconds
|
||||
|
||||
@ -71,7 +71,7 @@ jobs:
|
||||
name: Sweep CF orphans
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# 3 min surfaces hangs (CF API stall, AWS describe-instances stuck)
|
||||
# within one cron interval instead of burning a full tick. Realistic
|
||||
|
||||
@ -55,7 +55,7 @@ jobs:
|
||||
name: Sweep CF tunnels
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# 30 min cap. Was 5 min on the theory that the only thing that
|
||||
# could take >5min is a CF-API hang — but on 2026-05-02 a backlog
|
||||
|
||||
@ -46,7 +46,7 @@ jobs:
|
||||
name: Ops scripts (unittest)
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@ -31,7 +31,7 @@ jobs:
|
||||
name: Weekly Platform-Go Surface
|
||||
runs-on: ubuntu-latest
|
||||
# continue-on-error: surface only, never block
|
||||
# mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
defaults:
|
||||
run:
|
||||
|
||||
@ -292,8 +292,8 @@ func filterPeersByQuery(peers []map[string]interface{}, q string) []map[string]i
|
||||
needle := strings.ToLower(q)
|
||||
out := make([]map[string]interface{}, 0, len(peers))
|
||||
for _, p := range peers {
|
||||
name := p["name"].(string)
|
||||
role := p["role"].(string)
|
||||
name, _ := p["name"].(string) // nil → "" — safe on empty-role rows
|
||||
role, _ := p["role"].(string) // nil → "" — queryPeerMaps sets nil when DB role is empty
|
||||
if strings.Contains(strings.ToLower(name), needle) ||
|
||||
strings.Contains(strings.ToLower(role), needle) {
|
||||
out = append(out, p)
|
||||
|
||||
@ -394,6 +394,80 @@ func TestPeers_Q_NoMatches_RawBodyIsArrayNotNull(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestFilterPeersByQuery_NilRoleRegression is the regression gate for
|
||||
// mc#730/#731: queryPeerMaps sets peer["role"] = nil when the DB role column
|
||||
// is empty (discovery.go lines 337-341). filterPeersByQuery did a bare
|
||||
// type assertion p["role"].(string) which panics on nil. The fix uses the
|
||||
// comma-ok form so nil → "". The test passes a map with nil name and nil
|
||||
// role and asserts no panic + correct filter behaviour.
|
||||
func TestFilterPeersByQuery_NilRoleRegression(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
peers []map[string]interface{}
|
||||
q string
|
||||
wantLen int
|
||||
wantIDs []string
|
||||
}{
|
||||
{
|
||||
name: "nil role matches on name",
|
||||
peers: []map[string]interface{}{
|
||||
{"id": "ws-a", "name": nil, "role": nil},
|
||||
{"id": "ws-b", "name": "Alpha Builder", "role": nil},
|
||||
{"id": "ws-c", "name": "Beta Builder", "role": nil},
|
||||
},
|
||||
q: "alpha",
|
||||
wantLen: 1,
|
||||
wantIDs: []string{"ws-b"},
|
||||
},
|
||||
{
|
||||
name: "nil name matches on nil role (empty string)",
|
||||
peers: []map[string]interface{}{
|
||||
{"id": "ws-x", "name": nil, "role": nil},
|
||||
{"id": "ws-y", "name": "Dev Workspace", "role": nil},
|
||||
},
|
||||
q: "",
|
||||
wantLen: 2, // empty q is a no-op
|
||||
wantIDs: []string{"ws-x", "ws-y"},
|
||||
},
|
||||
{
|
||||
name: "all nil — no panic, returns input",
|
||||
peers: []map[string]interface{}{
|
||||
{"id": "ws-z", "name": nil, "role": nil},
|
||||
},
|
||||
q: "anything",
|
||||
wantLen: 0,
|
||||
wantIDs: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := filterPeersByQuery(tc.peers, tc.q)
|
||||
if len(got) != tc.wantLen {
|
||||
t.Fatalf("len: got %d, want %d", len(got), tc.wantLen)
|
||||
}
|
||||
gotIDs := make([]string, len(got))
|
||||
for i, p := range got {
|
||||
gotIDs[i] = p["id"].(string)
|
||||
}
|
||||
if tc.wantIDs != nil {
|
||||
for _, id := range tc.wantIDs {
|
||||
found := false
|
||||
for _, g := range gotIDs {
|
||||
if g == id {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("missing id %q; got IDs: %v", id, gotIDs)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func keysOf(m map[string]struct{}) []string {
|
||||
out := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
|
||||
@ -43,6 +43,27 @@ func (s *syncBuf) String() string {
|
||||
return s.b.String()
|
||||
}
|
||||
|
||||
// unwrapGoError extracts subprocess stderr from a Go-wrapped error that
|
||||
// includes combined output. e.g. from sendSSHPublicKey's
|
||||
// fmt.Errorf("send-ssh-public-key: %w (%s)", err, combinedOut), this
|
||||
// returns the " (%s)" portion — the actionable subprocess signal like
|
||||
// "AccessDeniedException: ... is not authorized to perform:
|
||||
// ec2-instance-connect:OpenTunnel". Returns "" when the output is
|
||||
// identical to the error string (no stderr captured).
|
||||
func unwrapGoError(errMsg string) string {
|
||||
// Extract content between the last '(' and trailing ')'. The
|
||||
// sendSSHPublicKey wrapper uses fmt.Errorf("...: %w (%s)", err, combinedOut)
|
||||
// so the subprocess stderr is always the last parenthesised segment,
|
||||
// e.g. "send-ssh-public-key: exit status 1 (AccessDeniedException: ...)"
|
||||
// — note the closing ')' is at the very end with no trailing space.
|
||||
open := strings.LastIndex(errMsg, "(")
|
||||
if open < 0 {
|
||||
return ""
|
||||
}
|
||||
inner := errMsg[open+1:]
|
||||
return strings.TrimSuffix(inner, ")")
|
||||
}
|
||||
|
||||
// HandleDiagnose handles GET /workspaces/:id/terminal/diagnose. It runs the
|
||||
// same per-step pipeline as HandleConnect (ssh-keygen → EIC send-key → tunnel
|
||||
// → ssh) but non-interactively, captures the first failing step and its
|
||||
@ -214,12 +235,18 @@ func (h *TerminalHandler) diagnoseRemote(ctx context.Context, workspaceID, insta
|
||||
}
|
||||
|
||||
// Step 2: send-ssh-public-key (AWS Instance Connect)
|
||||
// mc#687: populate Detail so the E2E smoke sees the AWS permission error
|
||||
// verbatim. The subprocess stderr (e.g. "AccessDeniedException: ... is not
|
||||
// authorized to perform: ec2-instance-connect:OpenTunnel") is captured by
|
||||
// sendSSHPublicKey's CombinedOutput() and embedded in the Go error string.
|
||||
t0 = time.Now()
|
||||
if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
|
||||
errMsg := err.Error()
|
||||
return stop("send-ssh-public-key", diagnoseStep{
|
||||
Name: "send-ssh-public-key",
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Error: err.Error(),
|
||||
Error: errMsg,
|
||||
Detail: unwrapGoError(errMsg),
|
||||
})
|
||||
}
|
||||
res.Steps = append(res.Steps, diagnoseStep{Name: "send-ssh-public-key", OK: true, DurationMs: time.Since(t0).Milliseconds()})
|
||||
|
||||
@ -245,3 +245,50 @@ func TestDiagnoseRemote_StopsAtSSHProbe(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestUnwrapGoError pins the unwrapGoError helper that extracts subprocess
|
||||
// stderr from the Go-wrapped error string produced by sendSSHPublicKey.
|
||||
// Regression gate for mc#687: the E2E smoke now reads detail (not error),
|
||||
// so detail MUST contain the actionable AWS permission signal.
|
||||
func TestUnwrapGoError(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "AWS permission denied",
|
||||
input: "send-ssh-public-key: exec: exit status 1 (AccessDeniedException: User: arn:aws:iam::123456789012:role/TestRole is not authorized to perform: ec2-instance-connect:OpenTunnel)",
|
||||
want: "AccessDeniedException: User: arn:aws:iam::123456789012:role/TestRole is not authorized to perform: ec2-instance-connect:OpenTunnel",
|
||||
},
|
||||
{
|
||||
name: "generic exec error no output",
|
||||
input: "send-ssh-public-key: exec: exit status 1",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "short string below threshold",
|
||||
input: "err",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "no parentheses",
|
||||
input: "send-ssh-public-key: something went wrong",
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := unwrapGoError(tc.input)
|
||||
if got != tc.want {
|
||||
t.Errorf("unwrapGoError(%q): got %q, want %q", tc.input, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user