Merge remote-tracking branch 'dev-lead/main' into pr693-test

2026-05-12 19:12:23 -07:00 · 2026-05-12 19:12:23 -07:00 · a224740d4d
commit a224740d4d
parent 23e408379d bb531afa30
40 changed files with 268 additions and 105 deletions
--- a/.gitea/workflows/block-internal-paths.yml
+++ b/.gitea/workflows/block-internal-paths.yml
@ -37,7 +37,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
--- a/.gitea/workflows/cascade-list-drift-gate.yml
+++ b/.gitea/workflows/cascade-list-drift-gate.yml
@ -48,7 +48,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
--- a/.gitea/workflows/check-migration-collisions.yml
+++ b/.gitea/workflows/check-migration-collisions.yml
@ -45,7 +45,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 5
    steps:
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@ -126,7 +126,7 @@ jobs:
    name: Platform (Go)
    needs: changes
    runs-on: ubuntu-latest
-    # mc#664 (interim): re-mask platform-build pending fix-forward. Phase 4
+    # mc#774 (interim): re-mask platform-build pending fix-forward. Phase 4
    # (#656) flipped this to continue-on-error: false based on a Phase-3-masked
    # "green on main 2026-05-12" — the prior continue-on-error: true had
    # been hiding failing tests in workspace-server/internal/handlers/.
@ -145,11 +145,11 @@ jobs:
    # Time-boxed Option A (90 min) did not fit the cross-cutting scope.
    # This is a sequenced revert→fix→reflip per
    # feedback_strict_root_only_after_class_a emergency clause — NOT
-    # a permanent re-mask. Re-flip blocked on mc#664 fix-forward landing.
+    # a permanent re-mask. Re-flip blocked on mc#774 fix-forward landing.
    # Other 4 #656 flips (changes, canvas-build, shellcheck, python-lint)
    # retain continue-on-error: false; only platform-build regresses.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true  # mc#664 fix-forward in flight; re-flip when mc#664 lands (PR #669 → rebase after #709)
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    continue-on-error: true  # mc#774 fix-forward in flight; re-flip when mc#774 lands (PR #669 → rebase after #709)
    defaults:
      run:
        working-directory: workspace-server
@ -187,7 +187,7 @@ jobs:
          echo "::group::pendinguploads exit=$pu_exit (last 100 lines)"
          tail -100 /tmp/test-pu.log
          echo "::endgroup::"
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
      - if: needs.changes.outputs.platform == 'true'
        name: Run tests with race detection and coverage
@ -374,7 +374,7 @@ jobs:
  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
    runs-on: ubuntu-latest
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    needs: [changes, canvas-build]
    # Only fires on direct pushes to main (i.e. after staging→main promotion).
@ -539,7 +539,7 @@ jobs:
    # `.gitea/scripts/ci-required-drift.py::ci_job_names`).
    #
    # Phase 3 (RFC #219 §1) safety: underlying build jobs carry
-    # continue-on-error: true so their failures are masked to null (2026-05-12: re-enabled mc#664 interim)
+    # continue-on-error: true so their failures are masked to null (2026-05-12: re-enabled mc#774 interim)
    # (Gitea suppresses status reporting for CoE jobs). This sentinel
    # runs with continue-on-error: false so it always reports its
    # result to the API — without this, the required-status entry
@ -572,7 +572,7 @@ jobs:
          import json, sys
          ns = json.load(sys.stdin)
          # Phase 3 masked: jobs with continue-on-error: true may report "failure"
-          # Remove when mc#664 handler test failures are resolved.
+          # Remove when mc#774 handler test failures are resolved.
          PHASE3_MASKED = {"platform-build"}
          # Exclude null (Phase 3 suppressed / in-flight) from the bad list.
          bad = [(k, v.get("result")) for k, v in ns.items()
--- a/.gitea/workflows/continuous-synth-e2e.yml
+++ b/.gitea/workflows/continuous-synth-e2e.yml
@ -90,7 +90,7 @@ jobs:
    name: Synthetic E2E against staging
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # Bumped from 12 → 20 (2026-05-04). Tenant user-data install phase
    # (apt-get update + install docker.io/jq/awscli/caddy + snap install
--- a/.gitea/workflows/e2e-api.yml
+++ b/.gitea/workflows/e2e-api.yml
@ -103,7 +103,7 @@ jobs:
  detect-changes:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      api: ${{ steps.decide.outputs.api }}
@ -155,7 +155,7 @@ jobs:
    name: E2E API Smoke Test
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 15
    env:
--- a/.gitea/workflows/e2e-staging-canvas.yml
+++ b/.gitea/workflows/e2e-staging-canvas.yml
@ -70,7 +70,7 @@ jobs:
  detect-changes:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      canvas: ${{ steps.decide.outputs.canvas }}
@ -119,7 +119,7 @@ jobs:
    name: Canvas tabs E2E
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 40

--- a/.gitea/workflows/e2e-staging-external.yml
+++ b/.gitea/workflows/e2e-staging-external.yml
@ -84,7 +84,7 @@ jobs:
    name: E2E Staging External Runtime
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25

--- a/.gitea/workflows/e2e-staging-saas.yml
+++ b/.gitea/workflows/e2e-staging-saas.yml
@ -88,20 +88,20 @@ jobs:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 1
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true

      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.11"
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true

      - name: YAML validation (best-effort)
        run: |
          echo "e2e-staging-saas.yml — PR validation: workflow YAML is valid."
          echo "E2E step runs only when provisioning-critical files change."
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true

  # Actual E2E: runs on trunk pushes (main + staging). NOT the PR-fire-only
@ -112,7 +112,7 @@ jobs:
    # Only runs on trunk pushes. PR paths get pr-validate instead.
    if: github.event.pull_request.base.ref == ''
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 45
    permissions:
--- a/.gitea/workflows/e2e-staging-sanity.yml
+++ b/.gitea/workflows/e2e-staging-sanity.yml
@ -37,7 +37,7 @@ jobs:
    name: Intentional-failure teardown sanity
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 20

--- a/.gitea/workflows/gate-check-v3.yml
+++ b/.gitea/workflows/gate-check-v3.yml
@ -46,7 +46,7 @@ env:
 jobs:
  gate-check:
    runs-on: ubuntu-latest
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true  # Never block on our own detector failing
    steps:
      - name: Check out BASE ref (never PR-head under pull_request_target)
--- a/.gitea/workflows/handlers-postgres-integration.yml
+++ b/.gitea/workflows/handlers-postgres-integration.yml
@ -78,8 +78,8 @@ jobs:
  detect-changes:
    name: detect-changes
    runs-on: ubuntu-latest
-    # mc#664 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      handlers: ${{ steps.filter.outputs.handlers }}
@ -119,8 +119,8 @@ jobs:
    name: Handlers Postgres Integration
    needs: detect-changes
    runs-on: ubuntu-latest
-    # mc#664 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    env:
      # Unique name per run so concurrent jobs don't collide on the
--- a/.gitea/workflows/harness-replays.yml
+++ b/.gitea/workflows/harness-replays.yml
@ -63,7 +63,7 @@ jobs:
  detect-changes:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      run: ${{ steps.decide.outputs.run }}
@ -155,7 +155,7 @@ jobs:
    name: Harness Replays
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 30
    steps:
--- a/.gitea/workflows/lint-bp-context-emit-match.yml
+++ b/.gitea/workflows/lint-bp-context-emit-match.yml
@ -1,6 +1,6 @@
 name: lint-bp-context-emit-match

-# Tier 2f scheduled lint (per internal#350) — detects drift between
+# Tier 2f scheduled lint (per mc#774) — detects drift between
 # `branch_protections/<branch>.status_check_contexts` and the set of
 # contexts emitted by `.gitea/workflows/*.yml`.
 #
@ -60,7 +60,7 @@ name: lint-bp-context-emit-match
 #
 # Cross-links
 # -----------
-# - internal#350 (the RFC that specs this lint)
+# - mc#774 (the RFC that specs this lint)
 # - internal#349 (cross-repo BP sweep)
 # - feedback_phantom_required_check_after_gitea_migration
 # - feedback_tier_label_ids_are_per_repo
@ -94,7 +94,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface drift without blocking. After 7
    # clean scheduled runs on main, flip to false so a scheduled
    # failure is a hard CI signal.
-    continue-on-error: true  # mc#664 Phase 3 — flip to false after 7 clean main runs
+    continue-on-error: true  # mc#774 Phase 3 — flip to false after 7 clean main runs
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
--- a/.gitea/workflows/lint-continue-on-error-tracking.yml
+++ b/.gitea/workflows/lint-continue-on-error-tracking.yml
@ -1,6 +1,6 @@
 name: lint-continue-on-error-tracking

-# Tier 2e hard-gate lint (per mc#664) — every
+# Tier 2e hard-gate lint (per mc#774) — every
 # `continue-on-error: true` in `.gitea/workflows/*.yml` must carry a
 # `# mc#NNNN` or `# internal#NNNN` tracker comment within 2 lines,
 # the referenced issue must be OPEN, and ≤14 days old.
@ -8,7 +8,7 @@ name: lint-continue-on-error-tracking
 # Why this exists
 # ---------------
 # `continue-on-error: true` on `platform-build` had been hiding
-# mc#664-class regressions for ~3 weeks before #656 surfaced them on
+# mc#774-class regressions for ~3 weeks before #656 surfaced them on
 # 2026-05-12. A 14-day cap on tracker age forces a review cycle and
 # surfaces mask-drift within at most 14 days of the original defect.
 # Each `continue-on-error: true` gets a paper trail — close or renew.
@ -45,12 +45,12 @@ name: lint-continue-on-error-tracking
 # close-and-flip, or document the deliberate keep-mask in a fresh
 # 14-day-renewable tracker. After main is clean for 3 days,
 # follow-up PR flips this workflow's continue-on-error to false.
-# Tracking: mc#664.
+# Tracking: mc#774.
 #
 # Cross-links
 # -----------
-# - mc#664 (the RFC that specs this lint)
-# - mc#664 (the empirical masked-3-weeks case)
+# - mc#774 (the RFC that specs this lint)
+# - mc#774 (the empirical masked-3-weeks case)
 # - feedback_chained_defects_in_never_tested_workflows
 # - feedback_behavior_based_ast_gates
 # - feedback_strict_root_only_after_class_a
@ -96,9 +96,9 @@ jobs:
    # Phase 3 (RFC #219 §1): surface masked defects without blocking
    # PRs. Pre-existing continue-on-error: true directives on main
    # all violate this lint at first — intentional. Flip to false
-    # follow-up after main is clean for 3 days. mc#664.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true  # mc#664 Phase 3 mask — 14d forced-renewal cadence
+    # follow-up after main is clean for 3 days. mc#774.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    continue-on-error: true  # mc#774 Phase 3 mask — 14d forced-renewal cadence
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
--- a/.gitea/workflows/lint-curl-status-capture.yml
+++ b/.gitea/workflows/lint-curl-status-capture.yml
@ -45,7 +45,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
--- a/.gitea/workflows/lint-mask-pr-atomicity.yml
+++ b/.gitea/workflows/lint-mask-pr-atomicity.yml
@ -1,6 +1,6 @@
 name: lint-mask-pr-atomicity

-# Tier 2d hard-gate lint (per mc#664) — blocks PRs that touch
+# Tier 2d hard-gate lint (per mc#774) — blocks PRs that touch
 # `.gitea/workflows/ci.yml` and modify ONLY ONE of {continue-on-error,
 # all-required.sentinel.needs} without a `Paired: #NNN` reference in
 # the PR body or in a commit message.
@ -37,13 +37,13 @@ name: lint-mask-pr-atomicity
 # This workflow lands at `continue-on-error: true` (Phase 3 — surface
 # regressions without blocking PRs while the rule beds in).
 # Follow-up PR flips to `false` once we have ≥3 days of clean runs on
-# `main` and no false-positives. Tracking issue: mc#664.
+# `main` and no false-positives. Tracking issue: mc#774.
 #
 # Cross-links
 # -----------
-# - mc#664 (the RFC that specs this lint)
+# - mc#774 (the RFC that specs this lint)
 # - PR#665 / PR#668 (the empirical split-pair)
-# - mc#664 (the main-red incident the split caused)
+# - mc#774 (the main-red incident the split caused)
 # - feedback_strict_root_only_after_class_a
 # - feedback_behavior_based_ast_gates
 #
@ -91,8 +91,8 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken shapes without blocking
    # PRs. Follow-up PR flips this to `false` once recent runs on main
    # are confirmed clean (eat-our-own-dogfood discipline mirrors
-    # PR#673's same-shape comment). Tracking: mc#664.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # PR#673's same-shape comment). Tracking: mc#774.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - name: Check out PR head with full history (need base SHA blobs)
--- a/.gitea/workflows/lint-pre-flip-continue-on-error.yml
+++ b/.gitea/workflows/lint-pre-flip-continue-on-error.yml
@ -4,7 +4,7 @@ name: Lint pre-flip continue-on-error
 # on any job in `.gitea/workflows/*.yml` WITHOUT proof that the affected
 # job's recent runs on the target branch (PR base) are actually green.
 #
-# Empirical class: PR #656 / mc#664. PR #656 (RFC internal#219 Phase 4)
+# Empirical class: PR #656 / mc#774. PR #656 (RFC internal#219 Phase 4)
 # flipped 5 platform-build-class jobs `continue-on-error: true → false`
 # on the basis of a "verified green on main via combined-status check".
 # But that "green" was the LIE the prior `continue-on-error: true`
@ -13,7 +13,7 @@ name: Lint pre-flip continue-on-error
 # job-level status. The precondition the PR claimed to verify was
 # structurally fooled by the bug being flipped.
 #
-# mc#664 captured the surfaced defects (2 mutually-masked regressions):
+# mc#774 captured the surfaced defects (2 mutually-masked regressions):
 #   - Class 1: sqlmock helper drift since 2f36bb9a (24 days old)
 #   - Class 2: OFFSEC-001 contract collision since 7d1a189f (1 day old)
 #
@ -55,7 +55,7 @@ name: Lint pre-flip continue-on-error
 #   - YAML parse error in one of the workflow files: warn-only,
 #     don't block — the YAML lint workflows catch this separately.
 #
-# Cross-links: PR#656, mc#664, PR#665 (interim re-mask),
+# Cross-links: PR#656, mc#774, PR#665 (interim re-mask),
 # Quirk #10 (internal#342 + dup #287), hongming-pc2 charter
 # §SOP-N rule (e), feedback_strict_root_only_after_class_a,
 # feedback_no_shared_persona_token_use.
@ -99,8 +99,8 @@ jobs:
    timeout-minutes: 8
    # Phase 3 (RFC internal#219 §1): surface broken flips without blocking
    # the PR yet. Follow-up flips this to `false` once the workflow itself
-    # has clean recent runs on main. mc#664 interim — remove when CoE→false.
-    continue-on-error: true  # mc#664
+    # has clean recent runs on main. mc#774 interim — remove when CoE→false.
+    continue-on-error: true  # mc#774
    steps:
      - name: Check out PR head (full history for base-SHA access)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
--- a/.gitea/workflows/lint-required-context-exists-in-bp.yml
+++ b/.gitea/workflows/lint-required-context-exists-in-bp.yml
@ -1,6 +1,6 @@
 name: lint-required-context-exists-in-bp

-# Tier 2g hard-gate lint (per internal#350) — diff-based PR-time
+# Tier 2g hard-gate lint (per mc#774) — diff-based PR-time
 # check. When a PR adds a NEW commit-status emission (workflow YAML
 # `name:` + job `name:`-or-key + on:-event), the workflow file must
 # carry one of three directives adjacent to the new job:
@ -16,7 +16,7 @@ name: lint-required-context-exists-in-bp
 # PR#656 added `CI / all-required (pull_request)` as a sentinel
 # context that workflows emit, but BP did NOT list it. When
 # platform-build failed, all-required failed, but BP let the PR
-# merge anyway → cascade to mc#664. With this lint, PR#656 would
+# merge anyway → cascade to mc#774. With this lint, PR#656 would
 # have been blocked until either the BP PATCH ran alongside OR
 # the author added a `bp-required: pending` directive.
 #
@ -27,7 +27,7 @@ name: lint-required-context-exists-in-bp
 # share the workflow-context enumeration helpers
 # (`_event_map`, `workflow_contexts`, `_job_display`) but the
 # semantics are intentionally distinct so they're separate scripts.
-# Co-design is documented in internal#350.
+# Co-design is documented in mc#774.
 #
 # Directive comment lives in the workflow file (NOT PR body)
 # ----------------------------------------------------------
@ -42,13 +42,13 @@ name: lint-required-context-exists-in-bp
 # Lands at `continue-on-error: true` (Phase 3 — surface the
 # pattern without blocking PRs while the directive convention
 # beds in). After 7 days of clean runs on `main` with no false
-# positives, follow-up flips to `false`. Tracking: internal#350.
+# positives, follow-up flips to `false`. Tracking: mc#774.
 #
 # Cross-links
 # -----------
-# - internal#350 (the RFC that specs this lint)
+# - mc#774 (the RFC that specs this lint)
 # - PR#656 (the empirical case)
-# - mc#664 (the surfaced cascade)
+# - mc#774 (the surfaced cascade)
 # - feedback_phantom_required_check_after_gitea_migration (Tier 2f cousin)
 # - feedback_behavior_based_ast_gates
 #
@ -83,8 +83,8 @@ jobs:
    timeout-minutes: 5
    # Phase 3 (RFC #219 §1): surface the pattern without blocking PRs
    # while the directive convention beds in. Follow-up flip to false
-    # after 7 clean days on main. internal#350.
-    continue-on-error: true  # mc#664 Phase 3 — flip to false after 7 clean main runs
+    # after 7 clean days on main. mc#774.
+    continue-on-error: true  # mc#774 Phase 3 — flip to false after 7 clean main runs
    steps:
      - name: Check out PR head with full history (need base SHA blobs)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
--- a/.gitea/workflows/lint-workflow-yaml.yml
+++ b/.gitea/workflows/lint-workflow-yaml.yml
@ -55,7 +55,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken shapes without blocking PRs.
    # Follow-up PR flips this off after the 4 existing-on-main rule-2
    # (workflow_run) violations are migrated to a supported trigger.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
--- a/.gitea/workflows/publish-canvas-image.yml
+++ b/.gitea/workflows/publish-canvas-image.yml
@ -9,18 +9,12 @@ name: publish-canvas-image
 #   - Workflow-level env.GITHUB_SERVER_URL pinned per
 #     feedback_act_runner_github_server_url.
 #   - `continue-on-error: true` on each job (RFC §1 contract).
-#   - **Open question for review**: this workflow pushes the canvas
-#     image to `ghcr.io`. GHCR was retired during the 2026-05-06
-#     Gitea migration in favor of ECR (per staging-verify.yml header
-#     notes). The image may not be consumable post-migration. Two
-#     options for follow-up: (a) retarget to
-#     `153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas`,
-#     or (b) retire this workflow entirely and route canvas deploys
-#     via the operator-host build path. tier:low + continue-on-error
-#     means failed pushes do not block PRs.
+#   - Retargeted the image push from GHCR to ECR. GHCR was retired during
+#     the 2026-05-06 Gitea migration, and Gitea's GITHUB_TOKEN cannot
+#     authenticate to ghcr.io.
 #

-# Builds and pushes the canvas Docker image to GHCR whenever a commit lands
+# Builds and pushes the canvas Docker image to ECR whenever a commit lands
 # on main that touches canvas code. Previously canvas changes were visible in
 # CI (npm run build passed) but the live container was never updated —
 # operators had to manually run `docker compose build canvas` each time.
@ -45,10 +39,10 @@ on:

 permissions:
  contents: read
-  packages: write  # required to push to ghcr.io/${{ github.repository_owner }}/*
+  packages: write

 env:
-  IMAGE_NAME: ghcr.io/molecule-ai/canvas
+  IMAGE_NAME: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas
  GITHUB_SERVER_URL: https://git.moleculesai.app

 jobs:
@ -62,22 +56,43 @@ jobs:
    # See issue #576 + infra-lead pulse ~00:30Z.
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - name: Log in to GHCR
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Log in to ECR
+        env:
+          IMAGE_NAME: ${{ env.IMAGE_NAME }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          AWS_DEFAULT_REGION: us-east-2
+        run: |
+          set -euo pipefail
+          ECR_REGISTRY="${IMAGE_NAME%%/*}"
+          aws ecr get-login-password --region us-east-2 | \
+            docker login --username AWS --password-stdin "${ECR_REGISTRY}"

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0

+      - name: Ensure ECR repository exists
+        env:
+          IMAGE_NAME: ${{ env.IMAGE_NAME }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          AWS_DEFAULT_REGION: us-east-2
+        run: |
+          set -euo pipefail
+          repo_path="${IMAGE_NAME#*/}"
+          if ! aws ecr describe-repositories --repository-names "${repo_path}" --region us-east-2 >/dev/null 2>&1; then
+            aws ecr create-repository \
+              --repository-name "${repo_path}" \
+              --image-scanning-configuration scanOnPush=true \
+              --region us-east-2 >/dev/null
+          fi
+
      # Health check: verify Docker daemon is accessible before attempting any
      # build steps. This fails loudly at step 1 when the runner's docker.sock
      # is inaccessible rather than silently continuing to the build step
@ -126,7 +141,7 @@ jobs:
          echo "platform_url=${PLATFORM_URL}" >> "$GITHUB_OUTPUT"
          echo "ws_url=${WS_URL}" >> "$GITHUB_OUTPUT"

-      - name: Build & push canvas image to GHCR
+      - name: Build & push canvas image to ECR
        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
        with:
          context: ./canvas
@ -142,6 +157,6 @@ jobs:
          cache-from: type=gha
          cache-to: type=gha,mode=max
          labels: |
-            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
            org.opencontainers.image.revision=${{ github.sha }}
            org.opencontainers.image.description=Molecule AI canvas (Next.js 15 + React Flow)
--- a/.gitea/workflows/publish-runtime-autobump.yml
+++ b/.gitea/workflows/publish-runtime-autobump.yml
@ -55,7 +55,7 @@ jobs:
  # The actual bump work happens on the main/staging push after merge.
  pr-validate:
    runs-on: ubuntu-latest
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true  # do not block PR merge on operational failures
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
--- a/.gitea/workflows/railway-pin-audit.yml
+++ b/.gitea/workflows/railway-pin-audit.yml
@ -51,7 +51,7 @@ jobs:
    name: Audit Railway env vars for drift-prone pins
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 10

--- a/.gitea/workflows/redeploy-tenants-on-main.yml
+++ b/.gitea/workflows/redeploy-tenants-on-main.yml
@ -86,7 +86,7 @@ jobs:
    if: ${{ github.event.workflow_run.conclusion == 'success' }}
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25
    steps:
--- a/.gitea/workflows/redeploy-tenants-on-staging.yml
+++ b/.gitea/workflows/redeploy-tenants-on-staging.yml
@ -76,7 +76,7 @@ jobs:
  redeploy:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25
    steps:
--- a/.gitea/workflows/review-check-tests.yml
+++ b/.gitea/workflows/review-check-tests.yml
@ -53,7 +53,7 @@ jobs:
        # runners with internet access to package mirrors). Falls back to GitHub
        # binary download. GitHub releases may be blocked on some runner networks
        # (infra#241 follow-up).
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
        run: |
          if apt-get update -qq && apt-get install -y -qq jq; then
--- a/.gitea/workflows/runtime-pin-compat.yml
+++ b/.gitea/workflows/runtime-pin-compat.yml
@ -67,7 +67,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
--- a/.gitea/workflows/runtime-prbuild-compat.yml
+++ b/.gitea/workflows/runtime-prbuild-compat.yml
@ -52,7 +52,7 @@ jobs:
  detect-changes:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      wheel: ${{ steps.decide.outputs.wheel }}
@ -97,7 +97,7 @@ jobs:
    name: PR-built wheel + import smoke
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - name: No-op pass (paths filter excluded this commit)
--- a/.gitea/workflows/secret-pattern-drift.yml
+++ b/.gitea/workflows/secret-pattern-drift.yml
@ -57,7 +57,7 @@ jobs:
    name: Detect SECRET_PATTERNS drift
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 5
    steps:
--- a/.gitea/workflows/sop-tier-check.yml
+++ b/.gitea/workflows/sop-tier-check.yml
@ -64,8 +64,8 @@ jobs:
  tier-check:
    runs-on: ubuntu-latest
    # BURN-IN: continue-on-error prevents AND-composition from blocking
-    # PRs during the 7-day window. Remove after 2026-05-17 (mc#664).
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # PRs during the 7-day window. Remove after 2026-05-17 (mc#774).
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    permissions:
      contents: read
@ -90,7 +90,7 @@ jobs:
        # runners). The sop-tier-check script has its own fallback as a
        # third line of defense. continue-on-error: true ensures this step
        # failing does not block the job.
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
        run: |
          # apt-get is the primary method — Ubuntu package mirrors are reliably
@ -111,7 +111,7 @@ jobs:
        # continue-on-error: true at step level — job-level is ignored by Gitea
        # Actions (quirk #10, internal runbooks). Belt-and-suspenders with
        # SOP_FAIL_OPEN=1 + || true below.
-        # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
        env:
          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
--- a/.gitea/workflows/staging-verify.yml
+++ b/.gitea/workflows/staging-verify.yml
@ -85,7 +85,7 @@ jobs:
  staging-smoke:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      sha: ${{ steps.compute.outputs.sha }}
@ -206,7 +206,7 @@ jobs:
    if: ${{ needs.staging-smoke.result == 'success' && needs.staging-smoke.outputs.smoke_ran == 'true' }}
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    env:
      SHA: ${{ needs.staging-smoke.outputs.sha }}
--- a/.gitea/workflows/sweep-aws-secrets.yml
+++ b/.gitea/workflows/sweep-aws-secrets.yml
@ -61,7 +61,7 @@ jobs:
    name: Sweep AWS Secrets Manager
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # 30 min cap, mirroring the other janitors. AWS DeleteSecret is
    # fast (~0.3s/call) so even a 100+ backlog drains in seconds
--- a/.gitea/workflows/sweep-cf-orphans.yml
+++ b/.gitea/workflows/sweep-cf-orphans.yml
@ -71,7 +71,7 @@ jobs:
    name: Sweep CF orphans
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # 3 min surfaces hangs (CF API stall, AWS describe-instances stuck)
    # within one cron interval instead of burning a full tick. Realistic
--- a/.gitea/workflows/sweep-cf-tunnels.yml
+++ b/.gitea/workflows/sweep-cf-tunnels.yml
@ -55,7 +55,7 @@ jobs:
    name: Sweep CF tunnels
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # 30 min cap. Was 5 min on the theory that the only thing that
    # could take >5min is a CF-API hang — but on 2026-05-02 a backlog
--- a/.gitea/workflows/test-ops-scripts.yml
+++ b/.gitea/workflows/test-ops-scripts.yml
@ -46,7 +46,7 @@ jobs:
    name: Ops scripts (unittest)
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
--- a/.gitea/workflows/weekly-platform-go.yml
+++ b/.gitea/workflows/weekly-platform-go.yml
@ -31,7 +31,7 @@ jobs:
    name: Weekly Platform-Go Surface
    runs-on: ubuntu-latest
    # continue-on-error: surface only, never block
-    # mc#664: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    defaults:
      run:
--- a/workspace-server/internal/handlers/discovery.go
+++ b/workspace-server/internal/handlers/discovery.go
@ -292,8 +292,8 @@ func filterPeersByQuery(peers []map[string]interface{}, q string) []map[string]i
 	needle := strings.ToLower(q)
 	out := make([]map[string]interface{}, 0, len(peers))
 	for _, p := range peers {
-		name := p["name"].(string)
-		role := p["role"].(string)
+		name, _ := p["name"].(string)  // nil → "" — safe on empty-role rows
+		role, _ := p["role"].(string)  // nil → "" — queryPeerMaps sets nil when DB role is empty
 		if strings.Contains(strings.ToLower(name), needle) ||
 			strings.Contains(strings.ToLower(role), needle) {
 			out = append(out, p)
--- a/workspace-server/internal/handlers/discovery_test.go
+++ b/workspace-server/internal/handlers/discovery_test.go
@ -394,6 +394,80 @@ func TestPeers_Q_NoMatches_RawBodyIsArrayNotNull(t *testing.T) {
 	}
 }

+// TestFilterPeersByQuery_NilRoleRegression is the regression gate for
+// mc#730/#731: queryPeerMaps sets peer["role"] = nil when the DB role column
+// is empty (discovery.go lines 337-341). filterPeersByQuery did a bare
+// type assertion p["role"].(string) which panics on nil. The fix uses the
+// comma-ok form so nil → "". The test passes a map with nil name and nil
+// role and asserts no panic + correct filter behaviour.
+func TestFilterPeersByQuery_NilRoleRegression(t *testing.T) {
+	cases := []struct {
+		name       string
+		peers      []map[string]interface{}
+		q          string
+		wantLen    int
+		wantIDs    []string
+	}{
+		{
+			name: "nil role matches on name",
+			peers: []map[string]interface{}{
+				{"id": "ws-a", "name": nil, "role": nil},
+				{"id": "ws-b", "name": "Alpha Builder", "role": nil},
+				{"id": "ws-c", "name": "Beta Builder", "role": nil},
+			},
+			q:       "alpha",
+			wantLen: 1,
+			wantIDs: []string{"ws-b"},
+		},
+		{
+			name: "nil name matches on nil role (empty string)",
+			peers: []map[string]interface{}{
+				{"id": "ws-x", "name": nil, "role": nil},
+				{"id": "ws-y", "name": "Dev Workspace", "role": nil},
+			},
+			q:       "",
+			wantLen: 2, // empty q is a no-op
+			wantIDs: []string{"ws-x", "ws-y"},
+		},
+		{
+			name: "all nil — no panic, returns input",
+			peers: []map[string]interface{}{
+				{"id": "ws-z", "name": nil, "role": nil},
+			},
+			q:       "anything",
+			wantLen: 0,
+			wantIDs: nil,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := filterPeersByQuery(tc.peers, tc.q)
+			if len(got) != tc.wantLen {
+				t.Fatalf("len: got %d, want %d", len(got), tc.wantLen)
+			}
+			gotIDs := make([]string, len(got))
+			for i, p := range got {
+				gotIDs[i] = p["id"].(string)
+			}
+			if tc.wantIDs != nil {
+				for _, id := range tc.wantIDs {
+					found := false
+					for _, g := range gotIDs {
+						if g == id {
+							found = true
+							break
+						}
+					}
+					if !found {
+						t.Errorf("missing id %q; got IDs: %v", id, gotIDs)
+					}
+				}
+			}
+		})
+	}
+}
+
 func keysOf(m map[string]struct{}) []string {
 	out := make([]string, 0, len(m))
 	for k := range m {
--- a/workspace-server/internal/handlers/terminal_diagnose.go
+++ b/workspace-server/internal/handlers/terminal_diagnose.go
@ -43,6 +43,27 @@ func (s *syncBuf) String() string {
 	return s.b.String()
 }

+// unwrapGoError extracts subprocess stderr from a Go-wrapped error that
+// includes combined output. e.g. from sendSSHPublicKey's
+// fmt.Errorf("send-ssh-public-key: %w (%s)", err, combinedOut), this
+// returns the " (%s)" portion — the actionable subprocess signal like
+// "AccessDeniedException: ... is not authorized to perform:
+// ec2-instance-connect:OpenTunnel". Returns "" when the output is
+// identical to the error string (no stderr captured).
+func unwrapGoError(errMsg string) string {
+	// Extract content between the last '(' and trailing ')'. The
+	// sendSSHPublicKey wrapper uses fmt.Errorf("...: %w (%s)", err, combinedOut)
+	// so the subprocess stderr is always the last parenthesised segment,
+	// e.g. "send-ssh-public-key: exit status 1 (AccessDeniedException: ...)"
+	// — note the closing ')' is at the very end with no trailing space.
+	open := strings.LastIndex(errMsg, "(")
+	if open < 0 {
+		return ""
+	}
+	inner := errMsg[open+1:]
+	return strings.TrimSuffix(inner, ")")
+}
+
 // HandleDiagnose handles GET /workspaces/:id/terminal/diagnose. It runs the
 // same per-step pipeline as HandleConnect (ssh-keygen → EIC send-key → tunnel
 // → ssh) but non-interactively, captures the first failing step and its
@ -214,12 +235,18 @@ func (h *TerminalHandler) diagnoseRemote(ctx context.Context, workspaceID, insta
 	}

 	// Step 2: send-ssh-public-key (AWS Instance Connect)
+	// mc#687: populate Detail so the E2E smoke sees the AWS permission error
+	// verbatim. The subprocess stderr (e.g. "AccessDeniedException: ... is not
+	// authorized to perform: ec2-instance-connect:OpenTunnel") is captured by
+	// sendSSHPublicKey's CombinedOutput() and embedded in the Go error string.
 	t0 = time.Now()
 	if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
+		errMsg := err.Error()
 		return stop("send-ssh-public-key", diagnoseStep{
 			Name:       "send-ssh-public-key",
 			DurationMs: time.Since(t0).Milliseconds(),
-			Error:      err.Error(),
+			Error:      errMsg,
+			Detail:     unwrapGoError(errMsg),
 		})
 	}
 	res.Steps = append(res.Steps, diagnoseStep{Name: "send-ssh-public-key", OK: true, DurationMs: time.Since(t0).Milliseconds()})
--- a/workspace-server/internal/handlers/terminal_diagnose_test.go
+++ b/workspace-server/internal/handlers/terminal_diagnose_test.go
@ -245,3 +245,50 @@ func TestDiagnoseRemote_StopsAtSSHProbe(t *testing.T) {
 	}
 }

+// TestUnwrapGoError pins the unwrapGoError helper that extracts subprocess
+// stderr from the Go-wrapped error string produced by sendSSHPublicKey.
+// Regression gate for mc#687: the E2E smoke now reads detail (not error),
+// so detail MUST contain the actionable AWS permission signal.
+func TestUnwrapGoError(t *testing.T) {
+	cases := []struct {
+		name   string
+		input  string
+		want   string
+	}{
+		{
+			name:  "AWS permission denied",
+			input: "send-ssh-public-key: exec: exit status 1 (AccessDeniedException: User: arn:aws:iam::123456789012:role/TestRole is not authorized to perform: ec2-instance-connect:OpenTunnel)",
+			want:  "AccessDeniedException: User: arn:aws:iam::123456789012:role/TestRole is not authorized to perform: ec2-instance-connect:OpenTunnel",
+		},
+		{
+			name:  "generic exec error no output",
+			input: "send-ssh-public-key: exec: exit status 1",
+			want:  "",
+		},
+		{
+			name:  "empty string",
+			input: "",
+			want:  "",
+		},
+		{
+			name:  "short string below threshold",
+			input: "err",
+			want:  "",
+		},
+		{
+			name:  "no parentheses",
+			input: "send-ssh-public-key: something went wrong",
+			want:  "",
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := unwrapGoError(tc.input)
+			if got != tc.want {
+				t.Errorf("unwrapGoError(%q): got %q, want %q", tc.input, got, tc.want)
+			}
+		})
+	}
+}
+