131 changed files with 2132 additions and 12453 deletions
--- a/.gitea/scripts/audit-force-merge.sh
+++ b/.gitea/scripts/audit-force-merge.sh
@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-# audit-force-merge — detect a §SOP-6 force-merge after PR close, emit
-# `incident.force_merge` to stdout as structured JSON.
-#
-# Vector's docker_logs source picks up runner stdout; the JSON gets
-# shipped to Loki on molecule-canonical-obs, indexable by event_type.
-# Query example:
-#
-#   {host="operator"} |= "event_type" |= "incident.force_merge" | json
-#
-# A force-merge is detected when a PR closed-with-merged=true had at
-# least one of the repo's required-status-check contexts in a state
-# other than "success" at the merge commit's SHA. That's exactly what
-# the Gitea force_merge:true API call lets through, so it's a faithful
-# detector of the override path.
-#
-# Triggers on `pull_request_target: closed` (loaded from base branch
-# per §SOP-6 security model). No-op when merged=false.
-#
-# Required env (set by the workflow):
-#   GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER, REQUIRED_CHECKS
-#
-# REQUIRED_CHECKS is a newline-separated list of status-check context
-# names that branch protection requires. Declared in the workflow YAML
-# rather than fetched from /branch_protections (which needs admin
-# scope — sop-tier-bot has read-only). Trade dynamism for simplicity:
-# when the required-check set changes, update both branch protection
-# AND this env. Keeping them in sync is less complexity than granting
-# the audit bot admin perms on every repo.
-
-set -euo pipefail
-
-: "${GITEA_TOKEN:?required}"
-: "${GITEA_HOST:?required}"
-: "${REPO:?required}"
-: "${PR_NUMBER:?required}"
-: "${REQUIRED_CHECKS:?required (newline-separated context names)}"
-
-OWNER="${REPO%%/*}"
-NAME="${REPO##*/}"
-API="https://${GITEA_HOST}/api/v1"
-AUTH="Authorization: token ${GITEA_TOKEN}"
-
-# 1. Fetch the PR. If not merged, no-op.
-PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
-MERGED=$(echo "$PR" | jq -r '.merged // false')
-if [ "$MERGED" != "true" ]; then
-  echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
-  exit 0
-fi
-
-MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
-MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
-TITLE=$(echo "$PR" | jq -r '.title // ""')
-BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
-HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
-
-if [ -z "$MERGE_SHA" ]; then
-  echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
-  exit 0
-fi
-
-# 2. Required status checks declared in the workflow env.
-REQUIRED="$REQUIRED_CHECKS"
-if [ -z "${REQUIRED//[[:space:]]/}" ]; then
-  echo "::notice::REQUIRED_CHECKS empty — force-merge not applicable."
-  exit 0
-fi
-
-# 3. Status-check state at the PR HEAD (where checks ran). The merge
-#    commit doesn't get its own checks; we evaluate the PR's last
-#    commit, which is what branch protection compared against.
-STATUS=$(curl -sS -H "$AUTH" \
-  "${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
-declare -A CHECK_STATE
-while IFS=$'\t' read -r ctx state; do
-  [ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
-done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
-
-# 4. For each required check, was it green at merge? YAML block scalars
-#    (`|`) leave a trailing newline; skip blank/whitespace-only lines.
-FAILED_CHECKS=()
-while IFS= read -r req; do
-  trimmed="${req#"${req%%[![:space:]]*}"}"   # ltrim
-  trimmed="${trimmed%"${trimmed##*[![:space:]]}"}"  # rtrim
-  [ -z "$trimmed" ] && continue
-  state="${CHECK_STATE[$trimmed]:-missing}"
-  if [ "$state" != "success" ]; then
-    FAILED_CHECKS+=("${trimmed}=${state}")
-  fi
-done <<< "$REQUIRED"
-
-if [ "${#FAILED_CHECKS[@]}" -eq 0 ]; then
-  echo "::notice::PR #${PR_NUMBER} merged with all required checks green — not a force-merge."
-  exit 0
-fi
-
-# 5. Emit structured audit event.
-NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ)
-FAILED_JSON=$(printf '%s\n' "${FAILED_CHECKS[@]}" | jq -R . | jq -s .)
-
-# Print as a single-line JSON so Vector's parse_json transform can pick
-# it up cleanly from docker_logs.
-jq -nc \
-  --arg event_type "incident.force_merge" \
-  --arg ts "$NOW" \
-  --arg repo "$REPO" \
-  --argjson pr "$PR_NUMBER" \
-  --arg title "$TITLE" \
-  --arg base "$BASE_BRANCH" \
-  --arg merged_by "$MERGED_BY" \
-  --arg merge_sha "$MERGE_SHA" \
-  --argjson failed_checks "$FAILED_JSON" \
-  '{event_type: $event_type, ts: $ts, repo: $repo, pr: $pr, title: $title,
-    base_branch: $base, merged_by: $merged_by, merge_sha: $merge_sha,
-    failed_checks: $failed_checks}'
-
-echo "::warning::FORCE-MERGE detected on PR #${PR_NUMBER} by ${MERGED_BY}: ${#FAILED_CHECKS[@]} required check(s) not green at merge time."
--- a/.gitea/scripts/sop-tier-check.sh
+++ b/.gitea/scripts/sop-tier-check.sh
@ -1,149 +0,0 @@
-#!/usr/bin/env bash
-# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
-#
-# Reads the PR's tier label, walks approving reviewers, and checks each
-# approver's Gitea team membership against the tier's eligible-team set.
-# Marks pass only when at least one non-author approver is in an eligible
-# team.
-#
-# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
-# the env vars below; this script does no IO outside of stdout/stderr +
-# the Gitea API.
-#
-# Required env:
-#   GITEA_TOKEN   — bot PAT with read:organization,read:user,
-#                   read:issue,read:repository scopes
-#   GITEA_HOST    — e.g. git.moleculesai.app
-#   REPO          — owner/name (from github.repository)
-#   PR_NUMBER     — int (from github.event.pull_request.number)
-#   PR_AUTHOR     — login (from github.event.pull_request.user.login)
-#
-# Optional:
-#   SOP_DEBUG=1   — print per-API-call diagnostic lines (HTTP codes,
-#                   raw response bodies). Default: off.
-#
-# Stale-status caveat: Gitea Actions does not always re-fire workflows
-# on `labeled` / `pull_request_review:submitted` events. If the
-# sop-tier-check status is stale (e.g. red after labels/approvals were
-# added), push an empty commit to the PR branch to force a synchronize
-# event, OR re-request reviews. Tracked: internal#46.
-
-set -euo pipefail
-
-debug() {
-  if [ "${SOP_DEBUG:-}" = "1" ]; then
-    echo "  [debug] $*" >&2
-  fi
-}
-
-# Validate env
-: "${GITEA_TOKEN:?GITEA_TOKEN required}"
-: "${GITEA_HOST:?GITEA_HOST required}"
-: "${REPO:?REPO required (owner/name)}"
-: "${PR_NUMBER:?PR_NUMBER required}"
-: "${PR_AUTHOR:?PR_AUTHOR required}"
-
-OWNER="${REPO%%/*}"
-NAME="${REPO##*/}"
-API="https://${GITEA_HOST}/api/v1"
-AUTH="Authorization: token ${GITEA_TOKEN}"
-echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
-
-# Sanity: token resolves to a user
-WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""')
-if [ -z "$WHOAMI" ]; then
-  echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
-  exit 1
-fi
-echo "::notice::token resolves to user: $WHOAMI"
-
-# 1. Read tier label
-LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name')
-TIER=""
-for L in $LABELS; do
-  case "$L" in
-    tier:low|tier:medium|tier:high)
-      if [ -n "$TIER" ]; then
-        echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
-        exit 1
-      fi
-      TIER="$L"
-    ;;
-  esac
-done
-if [ -z "$TIER" ]; then
-  echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
-  exit 1
-fi
-debug "tier=$TIER"
-
-# 2. Tier → eligible teams
-case "$TIER" in
-  tier:low)    ELIGIBLE="engineers managers ceo" ;;
-  tier:medium) ELIGIBLE="managers ceo" ;;
-  tier:high)   ELIGIBLE="ceo" ;;
-esac
-debug "eligible_teams=$ELIGIBLE"
-
-# Resolve team-name → team-id once. /orgs/{org}/teams/{slug}/... endpoints
-# don't exist on Gitea 1.22; we have to use /teams/{id}.
-ORG_TEAMS_FILE=$(mktemp)
-trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
-HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
-  "${API}/orgs/${OWNER}/teams")
-debug "teams-list HTTP=$HTTP_CODE size=$(wc -c <"$ORG_TEAMS_FILE")"
-if [ "${SOP_DEBUG:-}" = "1" ]; then
-  echo "  [debug] teams-list body (first 300 chars):" >&2
-  head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
-fi
-if [ "$HTTP_CODE" != "200" ]; then
-  echo "::error::GET /orgs/${OWNER}/teams returned HTTP $HTTP_CODE — token likely lacks read:org scope. Add a SOP_TIER_CHECK_TOKEN secret with read:organization scope at the org level."
-  exit 1
-fi
-declare -A TEAM_ID
-for T in $ELIGIBLE; do
-  ID=$(jq -r --arg t "$T" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
-  if [ -z "$ID" ] || [ "$ID" = "null" ]; then
-    VISIBLE=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
-    echo "::error::Team \"$T\" not found in org $OWNER. Teams visible: $VISIBLE"
-    exit 1
-  fi
-  TEAM_ID[$T]="$ID"
-  debug "team-id: $T → $ID"
-done
-
-# 3. Read approving reviewers
-REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
-APPROVERS=$(echo "$REVIEWS" | jq -r '[.[] | select(.state=="APPROVED") | .user.login] | unique | .[]')
-if [ -z "$APPROVERS" ]; then
-  echo "::error::No approving reviews. Tier $TIER requires approval from {$ELIGIBLE} (non-author)."
-  exit 1
-fi
-debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
-
-# 4. For each approver: check non-author + team membership (by id)
-OK=""
-for U in $APPROVERS; do
-  if [ "$U" = "$PR_AUTHOR" ]; then
-    debug "skip self-review by $U"
-    continue
-  fi
-  for T in $ELIGIBLE; do
-    ID="${TEAM_ID[$T]}"
-    CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
-      "${API}/teams/${ID}/members/${U}")
-    debug "probe: $U in team $T (id=$ID) → HTTP $CODE"
-    if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
-      echo "::notice::approver $U is in team $T (eligible for $TIER)"
-      OK="yes"
-      break
-    fi
-  done
-  [ -n "$OK" ] && break
-done
-
-if [ -z "$OK" ]; then
-  echo "::error::Tier $TIER requires approval from a non-author member of {$ELIGIBLE}. Got approvers: $APPROVERS — none of them satisfied team membership. Set SOP_DEBUG=1 to see per-probe HTTP codes."
-  exit 1
-fi
-echo "::notice::sop-tier-check passed: $TIER, approver in {$ELIGIBLE}"
--- a/.gitea/workflows/audit-force-merge.yml
+++ b/.gitea/workflows/audit-force-merge.yml
@ -1,58 +0,0 @@
-# audit-force-merge — emit `incident.force_merge` to runner stdout when
-# a PR is merged with required-status-checks not green. Vector picks
-# the JSON line off docker_logs and ships to Loki on
-# molecule-canonical-obs (per `reference_obs_stack_phase1`); query as:
-#
-#   {host="operator"} |= "event_type" |= "incident.force_merge" | json
-#
-# Closes the §SOP-6 audit gap (the doc says force-merges write to
-# `structure_events`, but that table lives in the platform DB, not
-# Gitea-side; Loki is the practical equivalent for Gitea Actions
-# events). When the credential / observability stack converges later,
-# this can sync into structure_events from Loki via a backfill job —
-# the structured JSON shape is forward-compatible.
-#
-# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
-# extract pattern as sop-tier-check.
-
-name: audit-force-merge
-
-# pull_request_target loads from the base branch — same security model
-# as sop-tier-check. Without this, an attacker could rewrite the
-# workflow on a PR and skip the audit emission for their own
-# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
-# rationale.
-on:
-  pull_request_target:
-    types: [closed]
-
-jobs:
-  audit:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: read
-    # Skip when PR is closed without merge — saves a runner.
-    if: github.event.pull_request.merged == true
-    steps:
-      - name: Check out base branch (for the script)
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          ref: ${{ github.event.pull_request.base.sha }}
-      - name: Detect force-merge + emit audit event
-        env:
-          # Same org-level secret the sop-tier-check workflow uses.
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
-          GITEA_HOST: git.moleculesai.app
-          REPO: ${{ github.repository }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          # Required-status-check contexts to evaluate at merge time.
-          # Newline-separated. Mirror this against branch protection
-          # (settings → branches → protected branch → required checks).
-          # Declared here rather than fetched from /branch_protections
-          # because that endpoint requires admin write — sop-tier-bot is
-          # read-only by design (least-privilege).
-          REQUIRED_CHECKS: |
-            sop-tier-check / tier-check (pull_request)
-            Secret scan / Scan diff for credential-shaped strings (pull_request)
-        run: bash .gitea/scripts/audit-force-merge.sh
--- a/.gitea/workflows/secret-scan.yml
+++ b/.gitea/workflows/secret-scan.yml
@ -1,191 +0,0 @@
-name: Secret scan
-
-# Hard CI gate. Refuses any PR / push whose diff additions contain a
-# recognisable credential. Defense-in-depth for the #2090-class incident
-# (2026-04-24): GitHub's hosted Copilot Coding Agent leaked a ghs_*
-# installation token into tenant-proxy/package.json via `npm init`
-# slurping the URL from a token-embedded origin remote. We can't fix
-# upstream's clone hygiene, so we gate here.
-#
-# Same regex set as the runtime's bundled pre-commit hook
-# (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh).
-# Keep the two sides aligned when adding patterns.
-#
-# Ported from .github/workflows/secret-scan.yml so the gate actually
-# fires on Gitea Actions. Differences from the GitHub version:
-#   - drops `merge_group` event (Gitea has no merge queue)
-#   - drops `workflow_call` (no cross-repo reusable invocation on Gitea)
-#   - SELF path updated to .gitea/workflows/secret-scan.yml
-# The job name + step name are identical to the GitHub workflow so the
-# status-check context (`Secret scan / Scan diff for credential-shaped
-# strings (pull_request)`) matches branch protection on molecule-core/main.
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened]
-  push:
-    branches: [main, staging]
-
-jobs:
-  scan:
-    name: Scan diff for credential-shaped strings
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 2  # need previous commit to diff against on push events
-
-      # For pull_request events the diff base may be many commits behind
-      # HEAD and absent from the shallow clone. Fetch it explicitly.
-      - name: Fetch PR base SHA (pull_request events only)
-        if: github.event_name == 'pull_request'
-        run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
-
-      - name: Refuse if credential-shaped strings appear in diff additions
-        env:
-          # Plumb event-specific SHAs through env so the script doesn't
-          # need conditional `${{ ... }}` interpolation per event type.
-          # github.event.before/after only exist on push events;
-          # pull_request has pull_request.base.sha / pull_request.head.sha.
-          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
-          PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-          PUSH_BEFORE: ${{ github.event.before }}
-          PUSH_AFTER: ${{ github.event.after }}
-        run: |
-          # Pattern set covers GitHub family (the actual #2090 vector),
-          # Anthropic / OpenAI / Slack / AWS. Anchored on prefixes with low
-          # false-positive rates against agent-generated content. Mirror of
-          # molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh
-          # — keep aligned.
-          SECRET_PATTERNS=(
-            'ghp_[A-Za-z0-9]{36,}'           # GitHub PAT (classic)
-            'ghs_[A-Za-z0-9]{36,}'           # GitHub App installation token
-            'gho_[A-Za-z0-9]{36,}'           # GitHub OAuth user-to-server
-            'ghu_[A-Za-z0-9]{36,}'           # GitHub OAuth user
-            'ghr_[A-Za-z0-9]{36,}'           # GitHub OAuth refresh
-            'github_pat_[A-Za-z0-9_]{82,}'   # GitHub fine-grained PAT
-            'sk-ant-[A-Za-z0-9_-]{40,}'      # Anthropic API key
-            'sk-proj-[A-Za-z0-9_-]{40,}'     # OpenAI project key
-            'sk-svcacct-[A-Za-z0-9_-]{40,}'  # OpenAI service-account key
-            'sk-cp-[A-Za-z0-9_-]{60,}'       # MiniMax API key (F1088 vector — caught only after the fact)
-            'xox[baprs]-[A-Za-z0-9-]{20,}'   # Slack tokens
-            'AKIA[0-9A-Z]{16}'               # AWS access key ID
-            'ASIA[0-9A-Z]{16}'               # AWS STS temp access key ID
-          )
-
-          # Determine the diff base. Each event type stores its SHAs in
-          # a different place — see the env block above.
-          case "${{ github.event_name }}" in
-            pull_request)
-              BASE="$PR_BASE_SHA"
-              HEAD="$PR_HEAD_SHA"
-              ;;
-            *)
-              BASE="$PUSH_BEFORE"
-              HEAD="$PUSH_AFTER"
-              ;;
-          esac
-
-          # On push events with shallow clones, BASE may be present in
-          # the event payload but absent from the local object DB
-          # (fetch-depth=2 doesn't always reach the previous commit
-          # across true merges). Try fetching it on demand. If the
-          # fetch fails — e.g. the SHA was force-overwritten — we fall
-          # through to the empty-BASE branch below, which scans the
-          # entire tree as if every file were new. Correct, just slow.
-          if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then
-            if ! git cat-file -e "$BASE" 2>/dev/null; then
-              git fetch --depth=1 origin "$BASE" 2>/dev/null || true
-            fi
-          fi
-
-          # Files added or modified in this change.
-          if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then
-            # New branch / no previous SHA / BASE unreachable — check the
-            # entire tree as added content. Slower, but correct on first
-            # push.
-            CHANGED=$(git ls-tree -r --name-only HEAD)
-            DIFF_RANGE=""
-          else
-            CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD")
-            DIFF_RANGE="$BASE $HEAD"
-          fi
-
-          if [ -z "$CHANGED" ]; then
-            echo "No changed files to inspect."
-            exit 0
-          fi
-
-          # Self-exclude: this workflow file legitimately contains the
-          # pattern strings as regex literals. Without an exclude it would
-          # block its own merge. Both the .github/ original and this
-          # .gitea/ port are excluded so a sync between them stays clean.
-          SELF_GITHUB=".github/workflows/secret-scan.yml"
-          SELF_GITEA=".gitea/workflows/secret-scan.yml"
-
-          OFFENDING=""
-          # `while IFS= read -r` (not `for f in $CHANGED`) so filenames
-          # containing whitespace don't word-split silently — a path
-          # with a space would otherwise produce two iterations on
-          # tokens that aren't real filenames, breaking the
-          # self-exclude + diff lookup.
-          while IFS= read -r f; do
-            [ -z "$f" ] && continue
-            [ "$f" = "$SELF_GITHUB" ] && continue
-            [ "$f" = "$SELF_GITEA" ] && continue
-            if [ -n "$DIFF_RANGE" ]; then
-              ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true)
-            else
-              # No diff range (new branch first push) — scan the full file
-              # contents as if every line were new.
-              ADDED=$(cat "$f" 2>/dev/null || true)
-            fi
-            [ -z "$ADDED" ] && continue
-            for pattern in "${SECRET_PATTERNS[@]}"; do
-              if echo "$ADDED" | grep -qE "$pattern"; then
-                OFFENDING="${OFFENDING}${f} (matched: ${pattern})\n"
-                break
-              fi
-            done
-          done <<< "$CHANGED"
-
-          if [ -n "$OFFENDING" ]; then
-            echo "::error::Credential-shaped strings detected in diff additions:"
-            # `printf '%b' "$OFFENDING"` interprets backslash escapes
-            # (the literal `\n` we appended above becomes a newline)
-            # WITHOUT treating OFFENDING as a format string. Plain
-            # `printf "$OFFENDING"` is a format-string sink: a filename
-            # containing `%` would be interpreted as a conversion
-            # specifier, corrupting the error message (or printing
-            # `%(missing)` artifacts).
-            printf '%b' "$OFFENDING"
-            echo ""
-            echo "The actual matched values are NOT echoed here, deliberately —"
-            echo "round-tripping a leaked credential into CI logs widens the blast"
-            echo "radius (logs are searchable + retained)."
-            echo ""
-            echo "Recovery:"
-            echo "  1. Remove the secret from the file. Replace with an env var"
-            echo "     reference (e.g. \${{ secrets.GITHUB_TOKEN }} in workflows,"
-            echo "     process.env.X in code)."
-            echo "  2. If the credential was already pushed (this PR's commit"
-            echo "     history reaches a public ref), treat it as compromised —"
-            echo "     ROTATE it immediately, do not just remove it. The token"
-            echo "     remains valid in git history forever and may be in any"
-            echo "     log/cache that consumed this branch."
-            echo "  3. Force-push the cleaned commit (or stack a revert) and"
-            echo "     re-run CI."
-            echo ""
-            echo "If the match is a false positive (test fixture, docs example,"
-            echo "or this workflow's own regex literals): use a clearly-fake"
-            echo "placeholder like ghs_EXAMPLE_DO_NOT_USE that doesn't satisfy"
-            echo "the length suffix, OR add the file path to the SELF exclude"
-            echo "list in this workflow with a short reason."
-            echo ""
-            echo "Mirror of the regex set lives in the runtime's bundled"
-            echo "pre-commit hook (molecule-ai-workspace-runtime:"
-            echo "molecule_runtime/scripts/pre-commit-checks.sh) — keep aligned."
-            exit 1
-          fi
-
-          echo "✓ No credential-shaped strings in this change."
--- a/.gitea/workflows/sop-tier-check.yml
+++ b/.gitea/workflows/sop-tier-check.yml
@ -1,81 +0,0 @@
-# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
-#
-# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
-# from the previous inline-bash version). The script is the single source
-# of truth; this workflow file just sets env + invokes it.
-#
-# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
-# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
-# §SOP-6 PR gate enforced. Pair with branch protection on the protected
-# branch:
-#   required_status_checks:    ["sop-tier-check / tier-check (pull_request)"]
-#   required_approving_reviews: 1
-#   approving_review_teams:    ["ceo", "managers", "engineers"]
-#
-# Tier → eligible-team mapping (mirror of dev-sop §SOP-6):
-#   tier:low    → engineers, managers, ceo
-#   tier:medium → managers, ceo
-#   tier:high   → ceo
-#
-# Force-merge: Owners-team override remains available out-of-band via
-# the Gitea merge API; force-merge writes `incident.force_merge` to
-# `structure_events` per §Persistent structured logging gate (Phase 3).
-#
-# Set `SOP_DEBUG: '1'` in the env block to enable per-API-call diagnostic
-# lines — useful when diagnosing token-scope or team-id-resolution
-# issues. Default off.
-
-name: sop-tier-check
-
-# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
-# `pull_request_target` loads the workflow definition from the BASE
-# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
-# with write access to a feature branch could rewrite this file in
-# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
-# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
-# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
-# is the documented mitigation.
-#
-# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
-# untrusted code is ever executed in the runner — we only HTTP-call the
-# Gitea API. If a future change adds a checkout step, it MUST pin to
-# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
-# the trust boundary.
-on:
-  pull_request_target:
-    types: [opened, edited, synchronize, reopened, labeled, unlabeled]
-  pull_request_review:
-    types: [submitted, dismissed, edited]
-
-jobs:
-  tier-check:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: read
-    steps:
-      - name: Check out base branch (for the script)
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          # Pin to base.sha — pull_request_target's protection only
-          # works if we never check out PR HEAD. Same SHA the workflow
-          # itself was loaded from.
-          ref: ${{ github.event.pull_request.base.sha }}
-      - name: Verify tier label + reviewer team membership
-        env:
-          # SOP_TIER_CHECK_TOKEN is the org-level secret for the
-          # sop-tier-bot PAT (read:organization,read:user,read:issue,
-          # read:repository). Stored at the org level
-          # (/api/v1/orgs/molecule-ai/actions/secrets) so per-repo
-          # configuration is unnecessary — every repo in the org
-          # picks it up automatically.
-          # Falls back to GITHUB_TOKEN with a clear error if missing.
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
-          GITEA_HOST: git.moleculesai.app
-          REPO: ${{ github.repository }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-          # Set to '1' for diagnostic per-API-call output. Off by default
-          # so production logs aren't noisy.
-          SOP_DEBUG: '0'
-        run: bash .gitea/scripts/sop-tier-check.sh
--- a/.github/workflows/auto-promote-on-e2e.yml
+++ b/.github/workflows/auto-promote-on-e2e.yml
@ -0,0 +1,429 @@
+name: Auto-promote :latest after main image build
+
+# Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
+# → `:latest` after either the image build or E2E completes on a `main`
+# push, gated on E2E Staging SaaS not being red for that SHA.
+#
+# Why two triggers:
+#
+#   `publish-workspace-server-image` and `e2e-staging-saas` are both
+#   paths-filtered, but with DIFFERENT path sets:
+#
+#     publish-workspace-server-image:
+#       workspace-server/**, canvas/**, manifest.json
+#
+#     e2e-staging-saas (full lifecycle):
+#       workspace-server/internal/handlers/{registry,workspace_provision,
+#       a2a_proxy}.go, workspace-server/internal/middleware/**,
+#       workspace-server/internal/provisioner/**, tests/e2e/test_staging_full_saas.sh
+#
+#   The E2E set is a strict SUBSET of the publish set. So:
+#     - canvas/** changes → publish fires, E2E does not
+#     - workspace-server/cmd/** changes → publish fires, E2E does not
+#     - workspace-server/internal/sweep/** → publish fires, E2E does not
+#
+#   The previous version triggered ONLY on E2E completion, which meant
+#   non-E2E-path changes (canvas, cmd, sweep, etc.) rebuilt the image
+#   but never advanced `:latest`. Result: as of 2026-04-28 this workflow
+#   had run zero times since merge despite eight main pushes — `:latest`
+#   was ~7 hours / 9 PRs behind main with no human realising. See
+#   `molecule-core` Slack discussion 2026-04-28.
+#
+#   Adding `publish-workspace-server-image` as a second trigger closes
+#   the gap: any image rebuild on main eligibly advances `:latest`.
+#
+# Why E2E remains a kill-switch (not the trigger):
+#
+#   When E2E DID run for this SHA and ended red, we abort — `:latest`
+#   stays on the prior known-good digest. When E2E didn't run (paths
+#   filtered out), we proceed: pre-merge gates already validated this
+#   SHA on staging via auto-promote-staging requiring CI + E2E Canvas +
+#   E2E API + CodeQL all green. Image content for non-E2E-paths
+#   (canvas, cmd, sweep) is exercised by those staging gates.
+#
+# Why `main` only:
+#
+#   `:latest` is what prod tenants pull. We only want SHAs that have
+#   reached main (via auto-promote-staging) to advance `:latest`.
+#   Triggering on staging would let a staging-only revert advance
+#   `:latest` to a SHA that never reaches main, breaking the "production
+#   runs what's on main" invariant.
+#
+# Idempotency:
+#
+#   When a SHA touches paths that match BOTH publish and E2E, both
+#   workflows fire and complete. Both trigger this workflow on
+#   completion → two runs race. Both retag `:staging-<sha>` →
+#   `:latest`. crane tag is idempotent (re-tagging the same digest is a
+#   no-op), so the second run is harmless. concurrency group serializes
+#   them anyway.
+
+on:
+  workflow_run:
+    workflows:
+      - 'E2E Staging SaaS (full lifecycle)'
+      - 'publish-workspace-server-image'
+    types: [completed]
+    branches: [main]
+  workflow_dispatch:
+    inputs:
+      sha:
+        description: 'Short sha to promote (override; defaults to upstream workflow_run head_sha)'
+        required: false
+        type: string
+
+permissions:
+  contents: read
+  packages: write
+
+concurrency:
+  # Serialize promotes per-SHA so the publish+E2E both-fired race lands
+  # cleanly. Different SHAs can promote in parallel.
+  group: auto-promote-latest-${{ github.event.workflow_run.head_sha || github.event.inputs.sha || github.sha }}
+  cancel-in-progress: false
+
+env:
+  IMAGE_NAME: ghcr.io/molecule-ai/platform
+  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
+
+jobs:
+  promote:
+    # Proceed if upstream succeeded OR manual dispatch. Upstream-failure
+    # paths are filtered here; the E2E-was-red kill-switch lives in the
+    # gate-check step below (covers the case where upstream is publish
+    # success but E2E for the same SHA failed).
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Compute short sha
+        id: sha
+        run: |
+          set -euo pipefail
+          if [ -n "${{ github.event.inputs.sha }}" ]; then
+            FULL="${{ github.event.inputs.sha }}"
+          else
+            FULL="${{ github.event.workflow_run.head_sha }}"
+          fi
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+          echo "full=${FULL}" >> "$GITHUB_OUTPUT"
+
+      - name: Gate — E2E Staging SaaS state for this SHA
+        # When upstream IS E2E success, we know it's green (filtered by
+        # the job-level `if` already). When upstream is publish, look up
+        # E2E state for the same SHA. Four buckets:
+        #
+        #   - completed/success: E2E confirmed safe → proceed
+        #   - completed/failure|cancelled|timed_out: E2E found a
+        #     regression → ABORT (exit 1), `:latest` stays put
+        #   - in_progress|queued|requested: E2E is RACING with publish
+        #     for a runtime-touching SHA. publish typically completes
+        #     ~5-10min before E2E (~10-15min). If we promote on the
+        #     publish signal here, a later E2E failure can't roll back
+        #     `:latest` — it'd already be wrongly advanced. So we DEFER:
+        #     skip subsequent steps (proceed=false) and let E2E's own
+        #     completion event re-fire this workflow, which then takes
+        #     the upstream-is-E2E path. exit 0 so the run shows as
+        #     success rather than a noisy fake-failure.
+        #   - none/none: E2E was paths-filtered out for this SHA (the
+        #     change touched canvas/cmd/sweep/etc. — paths covered by
+        #     publish but not by E2E). pre-merge gates on staging
+        #     already validated this SHA → proceed.
+        #
+        # Manual dispatch skips this check — operator override.
+        id: gate
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
+          SHA: ${{ steps.sha.outputs.full }}
+          UPSTREAM_NAME: ${{ github.event.workflow_run.name }}
+          EVENT_NAME: ${{ github.event_name }}
+        run: |
+          set -euo pipefail
+
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Manual dispatch — skipping E2E gate (operator override)"
+            exit 0
+          fi
+
+          if [ "$UPSTREAM_NAME" = "E2E Staging SaaS (full lifecycle)" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Upstream is E2E itself (success per job-level if) — gate trivially satisfied"
+            exit 0
+          fi
+
+          # Upstream is publish-workspace-server-image. Check E2E state.
+          # The jq filter must defend against TWO empty cases that gh
+          # CLI emits indistinguishably:
+          #   1. gh exits non-zero (network blip, auth issue) → handled
+          #      by the `|| echo "none/none"` fallback below.
+          #   2. gh exits zero but returns `[]` (no E2E run on this
+          #      main SHA — the common case for canvas-only / cmd-only
+          #      / sweep-only changes whose paths don't trigger E2E).
+          #      Without `(.[0] // {})`, jq sees `null` and emits
+          #      "null/none" — which the case statement below has no
+          #      branch for, so it falls into *) → exit 1.
+          # Surfaced 2026-04-30 the first time the App-token chain
+          # (#2389) actually fired auto-promote-on-e2e from a publish
+          # upstream — every prior run was E2E-upstream which
+          # short-circuits before this gate.
+          RESULT=$(gh run list \
+            --repo "$REPO" \
+            --workflow e2e-staging-saas.yml \
+            --branch main \
+            --commit "$SHA" \
+            --limit 1 \
+            --json status,conclusion \
+            --jq '(.[0] // {}) | "\(.status // "none")/\(.conclusion // "none")"' \
+            2>/dev/null || echo "none/none")
+
+          echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
+
+          case "$RESULT" in
+            completed/success)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E green for this SHA — proceeding with promote"
+              ;;
+            completed/failure|completed/timed_out)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
+                echo "\`:latest\` stays on the prior known-good digest."
+                echo
+                echo "If the failure was a flake, manually dispatch this workflow with the same sha to override."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            completed/cancelled)
+              # cancelled ≠ failure. Per-SHA concurrency cancels older E2E
+              # runs when a newer push lands (memory:
+              # feedback_concurrency_group_per_sha) — the newer SHA will
+              # have its own E2E + promote chain. Treat the same as
+              # in_progress: defer without aborting, let the next E2E run
+              # promote when it lands.
+              #
+              # Caught 2026-05-05 02:03 on sha 31f9a5e — auto-promote
+              # blocked the whole chain because this case fell through to
+              # exit 1 instead of clean defer.
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ⏭ Auto-promote deferred — E2E Staging SaaS was cancelled"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
+                echo "Likely per-SHA concurrency (newer push superseded this E2E run)."
+                echo "The newer SHA's E2E will fire its own promote when it lands."
+                echo "If you need this specific SHA promoted, manually dispatch."
+              } >> "$GITHUB_STEP_SUMMARY"
+              ;;
+            in_progress/*|queued/*|requested/*|waiting/*|pending/*)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ⏳ Auto-promote deferred — E2E Staging SaaS still running"
+                echo
+                echo "Publish completed before E2E for \`${SHA:0:7}\` (state: \`$RESULT\`)."
+                echo "Skipping retag here — E2E's own completion event will re-fire this workflow."
+                echo "If E2E ends green, that run promotes \`:latest\`. If red, it aborts."
+              } >> "$GITHUB_STEP_SUMMARY"
+              ;;
+            none/none)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E paths-filtered out for this SHA — pre-merge staging gates carry"
+              ;;
+            *)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❓ Auto-promote aborted — unexpected E2E state"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\` (unhandled)"
+                echo "Manual investigation needed; re-dispatch with the same sha once resolved."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+          esac
+
+      - if: steps.gate.outputs.proceed == 'true'
+        uses: imjasonh/setup-crane@6da1ae018866400525525ce74ff892880c099987 # v0.5
+
+      - name: GHCR login
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | \
+            crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Verify :staging-<sha> exists for both images
+        # Better to fail fast with a clear message than to half-tag
+        # (platform retagged but platform-tenant missing → tenants pull
+        # a stale image).
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          set -euo pipefail
+          for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
+            tag="${img}:staging-${{ steps.sha.outputs.short }}"
+            if ! crane manifest "$tag" >/dev/null 2>&1; then
+              echo "::error::Missing tag: $tag"
+              echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote can retag :latest."
+              exit 1
+            fi
+            echo "  ok: $tag exists"
+          done
+
+      - name: Ancestry check — refuse to promote :latest backwards
+        # #2244: workflow_run completions arrive in arbitrary order. If
+        # SHA-A and SHA-B both reach main within ~10 min and SHA-B's E2E
+        # completes before SHA-A's, this workflow can fire for SHA-A
+        # AFTER it already promoted SHA-B → :latest goes backwards. The
+        # orphan-reconciler "next run corrects it" doesn't apply: there's
+        # no auto-corrective re-promote, :latest stays wrong until the
+        # next main push lands.
+        #
+        # Detection: read current :latest's `org.opencontainers.image.revision`
+        # label (set by publish-workspace-server-image.yml at build time)
+        # and ask the GitHub compare API whether the candidate SHA is
+        # ahead-of / identical-to / behind / diverged-from current.
+        # Hard-fail on `behind` and `diverged` per the approved design —
+        # silent-bypass is the class we're moving away from. Workflow
+        # goes red, oncall sees it, operator decides how to recover
+        # (manual dispatch with the right SHA, force-promote, etc.).
+        #
+        # Manual dispatch skips this check — operator override semantics
+        # match the gate-check step above.
+        #
+        # Backward-compat: when current :latest carries no revision
+        # label (legacy image pre-publish-with-label), skip-with-warning.
+        # All :latest images on main are post-label as of 2026-04-29, so
+        # this branch will be dead within 90 days; remove then.
+        if: steps.gate.outputs.proceed == 'true' && github.event_name != 'workflow_dispatch'
+        id: ancestry
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
+          TARGET_SHA: ${{ steps.sha.outputs.full }}
+        run: |
+          set -euo pipefail
+
+          # Read the current :latest config and pull the revision label.
+          # `crane config` returns the OCI image config blob (not the manifest);
+          # labels live under `.config.Labels`. `// empty` makes jq return ""
+          # rather than the literal "null" so the test below works.
+          CURRENT_REVISION=$(crane config "${IMAGE_NAME}:latest" 2>/dev/null \
+            | jq -r '.config.Labels["org.opencontainers.image.revision"] // empty' \
+            || true)
+
+          if [ -z "$CURRENT_REVISION" ]; then
+            echo "decision=skip-no-label" >> "$GITHUB_OUTPUT"
+            {
+              echo "## ⚠ Ancestry check skipped — current :latest has no revision label"
+              echo
+              echo "Likely a legacy image built before \`org.opencontainers.image.revision\` was set."
+              echo "Falling through to retag. After all \`:latest\` images are post-label (TODO 90 days), this branch is dead and should be removed."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::warning::Current :latest carries no revision label — skipping ancestry check (legacy image)"
+            exit 0
+          fi
+
+          if [ "$CURRENT_REVISION" = "$TARGET_SHA" ]; then
+            echo "decision=identical" >> "$GITHUB_OUTPUT"
+            echo "::notice:::latest already at ${TARGET_SHA:0:7} — retag will be a no-op"
+            exit 0
+          fi
+
+          # Ask GitHub which side of the merge graph TARGET_SHA sits on
+          # relative to CURRENT_REVISION. Returns one of: ahead | identical
+          # | behind | diverged. Network or auth errors collapse to "error"
+          # via the explicit fallback so the case below always matches.
+          STATUS=$(gh api \
+            "repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}" \
+            --jq '.status' 2>/dev/null || echo "error")
+
+          echo "ancestry compare ${CURRENT_REVISION:0:7} → ${TARGET_SHA:0:7}: $STATUS"
+
+          case "$STATUS" in
+            ahead)
+              echo "decision=ahead" >> "$GITHUB_OUTPUT"
+              echo "::notice::Target ${TARGET_SHA:0:7} is ahead of current :latest (${CURRENT_REVISION:0:7}) — proceeding with retag"
+              ;;
+            identical)
+              echo "decision=identical" >> "$GITHUB_OUTPUT"
+              echo "::notice::Target identical to :latest — retag will be a no-op"
+              ;;
+            behind)
+              echo "decision=behind" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote refused — target is BEHIND current :latest"
+                echo
+                echo "| Field | Value |"
+                echo "|---|---|"
+                echo "| Target SHA | \`$TARGET_SHA\` |"
+                echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
+                echo "| GitHub compare status | \`behind\` |"
+                echo
+                echo "This guard catches the workflow_run-completion-order race (#2244):"
+                echo "two rapid main pushes whose E2Es complete out-of-order can otherwise"
+                echo "promote \`:latest\` backwards. \`:latest\` stays on \`${CURRENT_REVISION:0:7}\`."
+                echo
+                echo "**Recovery:** if this is a legitimate revert that should land on \`:latest\`,"
+                echo "manually dispatch this workflow with the target sha as input — the manual-dispatch"
+                echo "path skips the ancestry check (operator override)."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            diverged)
+              echo "decision=diverged" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❓ Auto-promote refused — history diverged"
+                echo
+                echo "| Field | Value |"
+                echo "|---|---|"
+                echo "| Target SHA | \`$TARGET_SHA\` |"
+                echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
+                echo "| GitHub compare status | \`diverged\` |"
+                echo
+                echo "Likely cause: force-push rewrote main's history, leaving the previous"
+                echo "\`:latest\` revision orphaned. Needs human review before \`:latest\` advances."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            error|*)
+              echo "decision=error" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote aborted — ancestry-check API error"
+                echo
+                echo "\`gh api repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}\` returned unexpected status: \`$STATUS\`"
+                echo
+                echo "Manual dispatch with the target sha bypasses this check."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+          esac
+
+      - name: Retag platform :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Retag tenant :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Summary
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          {
+            echo "## :latest promoted to ${{ steps.sha.outputs.short }}"
+            echo
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "- Trigger: manual dispatch"
+            else
+              echo "- Upstream: \`${{ github.event.workflow_run.name }}\` ([run](${{ github.event.workflow_run.html_url }}))"
+            fi
+            echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo
+            echo "Tenant fleet auto-pulls within 5 min via IMAGE_AUTO_REFRESH=true."
+            echo "Force immediate fanout: dispatch redeploy-tenants-on-main.yml."
+          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-promote-staging.yml
+++ b/.github/workflows/auto-promote-staging.yml
@ -0,0 +1,434 @@
+name: Auto-promote staging → main
+
+# Fires after any of the staging-branch quality gates complete. When ALL
+# required gates are green on the same staging SHA, opens (or re-uses)
+# a PR `staging → main` and enables auto-merge so the merge queue lands
+# it. Closes the gap that historically let features sit on staging for
+# weeks waiting for a bulk promotion PR (see molecule-core#1496 for the
+# 1172-commit example).
+#
+# 2026-04-28 rewrite (PR #142): the previous version did a direct
+# `git merge --ff-only origin staging && git push origin main`. That
+# breaks against main's branch-protection ruleset, which requires
+# status checks "set by the expected GitHub apps" — direct pushes
+# can't satisfy that condition (only PR merges through the queue can).
+# The workflow was failing every tick with:
+#   remote: error: GH006: Protected branch update failed for refs/heads/main.
+#   remote: - Required status checks ... were not set by the expected GitHub apps.
+# Fix: mirror the PR-based pattern from auto-sync-main-to-staging.yml
+# (the reverse-direction sync, fixed in #2234 for the same reason).
+# Both directions now use the same merge-queue path that humans use,
+# no special-case bypass.
+#
+# Safety model:
+# - Runs ONLY on workflow_run events for the staging branch.
+# - Requires EVERY named gate workflow to have the same head_sha and
+#   all be `conclusion == success`. If any of them is red, skipped,
+#   cancelled, or pending, we abort (stay on the current main).
+# - The PR base=main head=staging path lets GitHub itself enforce
+#   branch protection. If main has diverged from staging or required
+#   checks aren't satisfied, the merge queue declines the PR — no
+#   need for a manual ff-only ancestry check here.
+# - Loop safety: the auto-sync-main-to-staging workflow fires when
+#   main lands the auto-promote PR, but its merge into staging is by
+#   GITHUB_TOKEN which doesn't trigger downstream workflow_run events
+#   (GitHub Actions safety). So this workflow doesn't re-fire from
+#   its own promote landing.
+#
+# Toggle via repo variable AUTO_PROMOTE_ENABLED (true/unset). When
+# unset, the workflow logs what it would have done but doesn't open
+# the PR — useful for dry-running the gate logic without surfacing
+# a noisy PR while staging CI is still flaky.
+#
+# **One-time repo setting (load-bearing):** this workflow opens the
+# staging→main PR via `gh pr create` using the default GITHUB_TOKEN.
+# Since GitHub's 2022 default change, that token cannot create or
+# approve PRs unless the repo opts in. The toggle is at:
+#
+#   Settings → Actions → General → Workflow permissions
+#   → ✅ Allow GitHub Actions to create and approve pull requests
+#
+# Without it, every workflow_run fails with:
+#
+#   pull request create failed: GraphQL: GitHub Actions is not
+#   permitted to create or approve pull requests (createPullRequest)
+#
+# Observed 2026-04-29 01:43 UTC blocking promotion of fcd87b9 (PRs
+# #2248 + #2249); manually bridged via PR #2252. Re-check this
+# setting if auto-promote starts failing with createPullRequest
+# errors after a repo or org admin change.
+
+on:
+  workflow_run:
+    workflows:
+      - CI
+      - E2E Staging Canvas (Playwright)
+      - E2E API Smoke Test
+      - CodeQL
+    types: [completed]
+  workflow_dispatch:
+    inputs:
+      force:
+        description: "Force promote even when AUTO_PROMOTE_ENABLED is unset (manual override)"
+        required: false
+        default: "false"
+
+permissions:
+  contents: write
+  pull-requests: write
+  # actions: write is needed by the post-merge dispatch tail step
+  # (#2358 / #2357) — `gh workflow run publish-workspace-server-image.yml`
+  # POSTs to /actions/workflows/.../dispatches which requires this scope.
+  # Without it the call 403s and the publish/canary/redeploy chain still
+  # doesn't run on staging→main promotions, undoing #2358.
+  actions: write
+
+# Serialize auto-promote runs. Multiple staging gate completions can land
+# in quick succession (CI + E2E + CodeQL all finish within seconds of
+# each other on a green PR) — without this, two parallel runs both:
+#   1. Open / re-use the same promote PR.
+#   2. Both call `gh pr merge --auto` (idempotent — fine).
+#   3. Both poll for the same mergedAt and both `gh workflow run` publish
+#      → 2× redundant publish builds racing for the same `:staging-latest`
+#      retag, and 2× canary-verify chains.
+# cancel-in-progress: false because we don't want a brand-new run to kill
+# a polling-tail that's about to dispatch — the polling tail's 30 min cap
+# is the right backstop, not workflow-level cancel.
+concurrency:
+  group: auto-promote-staging
+  cancel-in-progress: false
+
+jobs:
+  check-all-gates-green:
+    # Only consider staging pushes. PRs into staging don't promote.
+    if: >
+      (github.event_name == 'workflow_run' &&
+       github.event.workflow_run.head_branch == 'staging' &&
+       github.event.workflow_run.event == 'push')
+      || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    outputs:
+      all_green: ${{ steps.gates.outputs.all_green }}
+      head_sha: ${{ steps.gates.outputs.head_sha }}
+    steps:
+      # Skip empty-tree promotes (the perpetual auto-promote↔auto-sync cycle
+      # observed 2026-05-03). Sequence: auto-promote merges via the staging
+      # merge-queue's MERGE strategy, creating a merge commit on main that
+      # staging doesn't have. auto-sync then merges main back into staging
+      # via another merge commit (the queue's MERGE strategy applies on
+      # the staging side too, even when the workflow's local FF would
+      # have sufficed). Now staging has a new merge-commit SHA whose
+      # tree == main's tree — but auto-promote sees "staging ahead of
+      # main by 1" and opens YET another empty promote PR. Each round
+      # costs ~30-40 min wallclock, ~2 manual approvals, and burns a
+      # full CodeQL Go run (~15 min). Without this guard the cycle
+      # repeats indefinitely.
+      #
+      # Long-term fix is to switch the merge_queue ruleset's
+      # `merge_method` away from MERGE so FF-able PRs land cleanly,
+      # but that's a broader change affecting every staging PR's
+      # commit shape. This guard is the one-line surgical fix that
+      # breaks the cycle without touching merge-queue config.
+      #
+      # Fail-open: if `git diff` errors for any reason, fall through
+      # to the gate check (preserve existing behavior). Only skip
+      # when the diff is DEFINITIVELY empty.
+      - name: Checkout for tree-diff check
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          ref: staging
+      - name: Skip if staging tree == main tree (perpetual-cycle break)
+        id: tree-diff
+        env:
+          HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
+        run: |
+          set -eu
+          git fetch origin main --depth=50 || { echo "::warning::git fetch main failed — proceeding (fail-open)"; exit 0; }
+          # Compare staging tip's tree against main's tree. `git diff
+          # --quiet` exits 0 if no differences, 1 if there are.
+          if git diff --quiet origin/main "$HEAD_SHA" -- 2>/dev/null; then
+            {
+              echo "## ⏭ Skipped — no code to promote"
+              echo
+              echo "staging tip (\`${HEAD_SHA:0:8}\`) and \`main\` have identical trees."
+              echo "This is the auto-promote↔auto-sync merge-commit cycle: staging has a"
+              echo "new SHA (a sync-back merge commit) but the underlying file tree is"
+              echo "already on main, so there's no real code to ship."
+              echo
+              echo "Skipping to avoid opening an empty promote PR. Cycle terminates here."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::notice::auto-promote: staging tree == main tree — no code to promote, skipping"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+      - name: Check all required gates on this SHA
+        if: steps.tree-diff.outputs.skip != 'true'
+        id: gates
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
+          REPO: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+
+          # Required gate workflow files. Use file paths (relative to
+          # .github/workflows/) rather than display names because:
+          #
+          #   1. `gh run list --workflow=<name>` is ambiguous when two
+          #      workflows have the same `name:` — observed 2026-04-28
+          #      with "CodeQL" matching both `codeql.yml` (explicit) and
+          #      GitHub's UI-configured Code-quality default setup
+          #      (internal "codeql"). gh CLI returns "could not resolve
+          #      to a unique workflow" → empty result → gate evaluated
+          #      as missing/none → auto-promote dead-locked despite all
+          #      checks actually passing.
+          #
+          #   2. File paths are the unique identifier for workflows;
+          #      `name:` is just a display string and can collide.
+          #
+          # When adding/removing a gate, update this list AND the
+          # branch-protection required-checks list (which uses check-run
+          # display names, not workflow names; the two are decoupled and
+          # should be kept in sync manually).
+          GATES=(
+            "ci.yml"
+            "e2e-staging-canvas.yml"
+            "e2e-api.yml"
+            "codeql.yml"
+          )
+
+          echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT"
+          echo "Checking gates on SHA ${HEAD_SHA}"
+
+          ALL_GREEN=true
+          for gate in "${GATES[@]}"; do
+            # Query the most recent run of this workflow on this SHA.
+            # event=push to avoid picking up PR runs. branch=staging to
+            # guard against someone dispatching the gate on a non-staging
+            # branch at the same SHA.
+            RESULT=$(gh run list \
+              --repo "$REPO" \
+              --workflow "$gate" \
+              --branch staging \
+              --event push \
+              --commit "$HEAD_SHA" \
+              --limit 1 \
+              --json status,conclusion \
+              --jq '.[0] | "\(.status)/\(.conclusion // "none")"' \
+              2>/dev/null || echo "missing/none")
+
+            echo "  $gate → $RESULT"
+
+            # Only completed/success counts. completed/failure or
+            # in_progress/anything or no record at all = abort.
+            if [ "$RESULT" != "completed/success" ]; then
+              ALL_GREEN=false
+            fi
+          done
+
+          echo "all_green=${ALL_GREEN}" >> "$GITHUB_OUTPUT"
+          if [ "$ALL_GREEN" != "true" ]; then
+            echo "::notice::auto-promote: not all gates are green on ${HEAD_SHA} — staying on current main"
+          fi
+
+  promote:
+    needs: check-all-gates-green
+    if: needs.check-all-gates-green.outputs.all_green == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check rollout gate
+        env:
+          AUTO_PROMOTE_ENABLED: ${{ vars.AUTO_PROMOTE_ENABLED }}
+          FORCE_INPUT: ${{ github.event.inputs.force }}
+        run: |
+          set -eu
+          # Repo variable AUTO_PROMOTE_ENABLED=true flips this on. While
+          # it's unset, the workflow dry-runs (logs what it would have
+          # done) but doesn't open the promote PR. Set the variable in
+          # Settings → Secrets and variables → Actions → Variables.
+          if [ "${AUTO_PROMOTE_ENABLED:-}" != "true" ] && [ "${FORCE_INPUT:-false}" != "true" ]; then
+            {
+              echo "## ⏸ Auto-promote disabled"
+              echo
+              echo "Repo variable \`AUTO_PROMOTE_ENABLED\` is not set to \`true\`."
+              echo "All gates are green on staging; would have opened a promote PR to \`main\`."
+              echo
+              echo "To enable: Settings → Secrets and variables → Actions → Variables → \`AUTO_PROMOTE_ENABLED=true\`."
+              echo "To test once manually: workflow_dispatch with \`force=true\`."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::notice::auto-promote disabled — dry run only"
+            exit 0
+          fi
+
+      # Mint the App token BEFORE the promote-PR step so the auto-merge
+      # call can use it. GITHUB_TOKEN-initiated merges suppress the
+      # downstream `push` event on main, breaking the
+      # publish-workspace-server-image → canary-verify → redeploy-tenants
+      # chain (issue #2357). Using the App token here means the
+      # merge-queue-landed merge IS able to fire the cascade naturally;
+      # the polling tail below stays as defense-in-depth.
+      - name: Mint App token for promote-PR + downstream dispatch
+        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
+        id: app-token
+        uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
+        with:
+          app-id: ${{ secrets.MOLECULE_AI_APP_ID }}
+          private-key: ${{ secrets.MOLECULE_AI_APP_PRIVATE_KEY }}
+
+      - name: Open (or reuse) staging → main promote PR + enable auto-merge
+        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+          REPO: ${{ github.repository }}
+          TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
+        run: |
+          set -euo pipefail
+
+          # Look for an existing open promote PR (idempotent on re-run
+          # of the workflow). The PR's head IS the staging branch — the
+          # whole point is "advance main to staging's tip", so we don't
+          # need a per-SHA branch like auto-sync-main-to-staging uses.
+          PR_NUM=$(gh pr list --repo "$REPO" \
+            --base main --head staging --state open \
+            --json number --jq '.[0].number // ""')
+
+          if [ -z "$PR_NUM" ]; then
+            TITLE="staging → main: auto-promote ${TARGET_SHA:0:7}"
+            BODY_FILE=$(mktemp)
+            cat > "$BODY_FILE" <<EOFBODY
+          Automated promotion of \`staging\` (\`${TARGET_SHA:0:8}\`) to \`main\`. All required staging gates green at this SHA: CI, E2E Staging Canvas, E2E API Smoke, CodeQL.
+
+          This PR is auto-generated by \`.github/workflows/auto-promote-staging.yml\` whenever every required gate completes green on the same staging SHA. It exists because main's branch protection requires status checks "set by the expected GitHub apps" — direct \`git push\` from a workflow can't satisfy that, only PR merges through the queue can.
+
+          Merge queue lands this; no human action needed unless gates fail. Reverse-direction sync (the merge commit on main → staging) is handled by \`auto-sync-main-to-staging.yml\`.
+          EOFBODY
+            PR_URL=$(gh pr create --repo "$REPO" \
+              --base main --head staging \
+              --title "$TITLE" \
+              --body-file "$BODY_FILE")
+            PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
+            rm -f "$BODY_FILE"
+            echo "::notice::Opened PR #${PR_NUM}"
+          else
+            echo "::notice::Re-using existing promote PR #${PR_NUM}"
+          fi
+
+          # Enable auto-merge — the merge queue picks it up once
+          # required gates are green on the merge_group ref.
+          if ! gh pr merge "$PR_NUM" --repo "$REPO" --auto --merge 2>&1; then
+            echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
+          fi
+
+          {
+            echo "## ✅ Auto-promote PR opened"
+            echo
+            echo "- Source: staging at \`${TARGET_SHA:0:8}\`"
+            echo "- PR: #${PR_NUM}"
+            echo
+            echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
+          } >> "$GITHUB_STEP_SUMMARY"
+
+          # Hand the PR number to the next step so we can dispatch the
+          # tenant-redeploy chain after the merge queue lands the merge.
+          echo "promote_pr_num=${PR_NUM}" >> "$GITHUB_OUTPUT"
+        id: promote_pr
+
+      # The App token minted above (before the promote-PR step) is
+      # also used by the polling tail below. Defense-in-depth: with
+      # the merge-queue-landed merge now using the App token, the
+      # main-branch push event SHOULD fire the publish/canary/redeploy
+      # cascade naturally — but if for any reason it doesn't (e.g. an
+      # unrelated event-suppression edge case), the explicit dispatches
+      # below still wake the chain.
+      - name: Wait for promote merge, then dispatch publish + redeploy (#2357)
+        # Defense-in-depth dispatch. With the auto-merge call above
+        # now using the App token (this commit), the merge-queue-landed
+        # merge SHOULD fire publish-workspace-server-image naturally
+        # via on:push:[main] — App-token-initiated pushes DO trigger
+        # workflow_run cascades, unlike GITHUB_TOKEN-initiated ones
+        # (the documented "no recursion" rule —
+        # https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
+        #
+        # This explicit dispatch stays as belt-and-suspenders for any
+        # edge case where the natural cascade misfires. If it never
+        # observably fires after this token swap (i.e. the publish
+        # workflow has already started by the time we get here), the
+        # second dispatch is a harmless no-op (publish-workspace-server-image
+        # has its own concurrency group that dedupes).
+        #
+        # See PR for #2357: pre-fix the merge action was via
+        # GITHUB_TOKEN, suppressing the cascade and forcing this tail
+        # to be the SOLE chain trigger. With the auto-merge token swap
+        # the tail becomes redundant in the happy path; keep until
+        # we've observed >=10 successful natural cascades, then drop.
+        if: steps.promote_pr.outputs.promote_pr_num != ''
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+          REPO: ${{ github.repository }}
+          PR_NUM: ${{ steps.promote_pr.outputs.promote_pr_num }}
+        run: |
+          # Poll for merge — max 30 min (60 × 30s). The merge queue
+          # typically lands within 5-10 min when gates are green. Break
+          # early if the PR is closed without merging (operator action,
+          # gates flipped red post-approval, branch-protection rejection)
+          # so we don't tie up a runner for the full 30 min on a dead PR.
+          MERGED=""
+          STATE=""
+          for _ in $(seq 1 60); do
+            VIEW=$(gh pr view "$PR_NUM" --repo "$REPO" --json mergedAt,state)
+            MERGED=$(echo "$VIEW" | jq -r '.mergedAt // ""')
+            STATE=$(echo "$VIEW" | jq -r '.state // ""')
+            if [ -n "$MERGED" ] && [ "$MERGED" != "null" ]; then
+              echo "::notice::Promote PR #${PR_NUM} merged at ${MERGED}"
+              break
+            fi
+            if [ "$STATE" = "CLOSED" ]; then
+              echo "::warning::Promote PR #${PR_NUM} was closed without merging — skipping deploy dispatch."
+              exit 0
+            fi
+            sleep 30
+          done
+
+          if [ -z "$MERGED" ] || [ "$MERGED" = "null" ]; then
+            echo "::warning::Promote PR #${PR_NUM} didn't merge within 30min — skipping deploy dispatch (manually run \`gh workflow run publish-workspace-server-image.yml --ref main\` once it lands)."
+            exit 0
+          fi
+
+          # Dispatch publish on main using the App token. App-initiated
+          # workflow_dispatch DOES propagate the workflow_run cascade,
+          # unlike GITHUB_TOKEN-initiated dispatch.
+          # publish completes → canary-verify chains via workflow_run →
+          # redeploy-tenants-on-main chains via workflow_run + branches:[main].
+          if gh workflow run publish-workspace-server-image.yml \
+              --repo "$REPO" --ref main 2>&1; then
+            echo "::notice::Dispatched publish-workspace-server-image on ref=main as molecule-ai App — canary-verify and redeploy-tenants-on-main will chain via workflow_run."
+            {
+              echo "## 🚀 Tenant redeploy chain dispatched"
+              echo
+              echo "- publish-workspace-server-image (workflow_dispatch on \`main\`, actor: \`molecule-ai[bot]\`)"
+              echo "- canary-verify will chain on completion"
+              echo "- redeploy-tenants-on-main will chain on canary green"
+            } >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "::error::Failed to dispatch publish-workspace-server-image. Run manually: gh workflow run publish-workspace-server-image.yml --ref main"
+          fi
+
+          # ALSO dispatch auto-sync-main-to-staging.yml. Same root cause as
+          # publish above (issue #2357): the merge-queue-initiated push to
+          # main is by GITHUB_TOKEN → no `on: push` triggers fire downstream.
+          # Without this dispatch, every staging→main promote leaves staging
+          # one merge commit BEHIND main, which silently dead-locks the NEXT
+          # promote PR as `mergeStateStatus: BEHIND` because main's
+          # branch-protection has `strict: true`. Verified empirically on
+          # 2026-05-02 against PR #2442 (Phase 2 promote): only the explicit
+          # publish-workspace-server-image dispatch fired on the previous
+          # promote SHA 76c604fb, while auto-sync silently no-op'd, leaving
+          # staging behind for ~24h until manually bridged.
+          if gh workflow run auto-sync-main-to-staging.yml \
+              --repo "$REPO" --ref main 2>&1; then
+            echo "::notice::Dispatched auto-sync-main-to-staging on ref=main as molecule-ai App — staging will absorb the new main merge commit via PR + merge queue."
+          else
+            echo "::error::Failed to dispatch auto-sync-main-to-staging. Run manually: gh workflow run auto-sync-main-to-staging.yml --ref main"
+          fi
--- a/.github/workflows/auto-promote-stale-alarm.yml
+++ b/.github/workflows/auto-promote-stale-alarm.yml
@ -0,0 +1,83 @@
+name: auto-promote-stale-alarm
+
+# Hourly cron + on-demand alarm for the silent-block failure mode that
+# motivated issue #2975:
+#   - The auto-promote-staging.yml workflow opened a PR + armed
+#     auto-merge, but main's branch protection requires a human review
+#     (reviewDecision=REVIEW_REQUIRED). The PR sat BLOCKED with no
+#     surface-up-the-stack for 12+ hours, holding 25 commits hostage
+#     including the Memory v2 redesign and a reno-stars data-loss fix.
+#
+# This workflow runs `scripts/check-stale-promote-pr.sh` against the
+# repo's open auto-promote PRs (base=main head=staging). When a PR has
+# been BLOCKED on REVIEW_REQUIRED for >4h, it:
+#   1. Emits a workflow-level warning (visible in run summary + the
+#      Actions UI feed).
+#   2. Posts a comment on the PR (idempotent — one alarm per PR).
+#
+# The detection logic lives in scripts/check-stale-promote-pr.sh so
+# it's unit-testable with stubbed `gh` (see test-check-stale-promote-pr.sh).
+# This file is the schedule + invocation surface only — SSOT for the
+# detector itself.
+
+on:
+  schedule:
+    # Hourly. Cheap (one `gh pr list` + jq), and 1h granularity is
+    # plenty for a 4h staleness threshold — operators see the alarm
+    # within at most 1h of crossing the threshold.
+    - cron: "27 * * * *"  # at :27 to dodge the cron herd at :00
+  workflow_dispatch:
+    inputs:
+      stale_hours:
+        description: "Hours after which a BLOCKED+REVIEW_REQUIRED PR is stale (default 4)"
+        required: false
+        default: "4"
+      post_comment:
+        description: "Post a comment on stale PRs (default true)"
+        required: false
+        default: "true"
+
+permissions:
+  contents: read
+  pull-requests: write  # post comments on stale PRs
+
+# Serialize so the on-demand and scheduled runs don't double-comment
+# the same PR. cancel-in-progress=false because the script is idempotent
+# (existing comment marker prevents dupes), but a scheduled run firing
+# while a manual one runs would just re-list the same PR set.
+concurrency:
+  group: auto-promote-stale-alarm
+  cancel-in-progress: false
+
+jobs:
+  scan:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout (need scripts/ only)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          sparse-checkout: |
+            scripts/check-stale-promote-pr.sh
+          sparse-checkout-cone-mode: false
+      - name: Run stale-PR detector
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          STALE_HOURS: ${{ inputs.stale_hours || '4' }}
+          POST_COMMENT: ${{ inputs.post_comment || 'true' }}
+        run: |
+          # The script's exit code reflects the count of stale PRs.
+          # We don't want a stale finding to fail the workflow run —
+          # the warning + comment are the signal, the green/red is
+          # noise. So convert any non-zero exit to a workflow notice
+          # and exit 0.
+          set +e
+          bash scripts/check-stale-promote-pr.sh
+          rc=$?
+          set -e
+          if [ "$rc" -ne 0 ]; then
+            echo "::notice::Stale PR detector found $rc PR(s) needing attention. See warnings above + comments on the PRs."
+          fi
+          # Always succeed — operator-facing surface is the warning,
+          # not the workflow status.
+          exit 0
--- a/.github/workflows/auto-sync-main-to-staging.yml
+++ b/.github/workflows/auto-sync-main-to-staging.yml
@ -0,0 +1,237 @@
+name: Auto-sync main → staging
+
+# Reflects every push to `main` back onto `staging` so the
+# staging-as-superset-of-main invariant holds.
+#
+# Background:
+#
+# `auto-promote-staging.yml` advances main via `git merge --ff-only`
+# + `git push origin main` — that's a clean fast-forward, no merge
+# commit. But manual merges of `staging → main` PRs through the
+# GitHub UI / API create a merge commit on main that staging
+# doesn't have. The next `staging → main` PR then evaluates as
+# "BEHIND" because staging is missing that merge commit, requiring
+# a manual `gh pr update-branch` round-trip.
+#
+# This happened twice on 2026-04-28 (PRs #2202, #2205, both manual
+# bridges). Each time the bridge needed update-branch + a re-CI
+# round before merging. Operationally annoying and avoidable.
+#
+# Architecture:
+#
+# This repo's `staging` branch is protected by a `merge_queue`
+# ruleset (id 15500102) that blocks ALL direct pushes — no bypass
+# even for org admins or the GitHub Actions integration. Direct
+# `git push origin staging` returns GH013. So instead of pushing
+# directly, this workflow:
+#
+#   1. Checks if main is already in staging's ancestry → no-op.
+#   2. Creates an `auto-sync/main-<sha>` branch from staging.
+#   3. Tries `git merge --ff-only origin/main` → if staging hasn't
+#      diverged this is a clean ff.
+#   4. Otherwise `git merge --no-ff origin/main` to absorb main's
+#      tip while keeping staging's history.
+#   5. Pushes the auto-sync branch.
+#   6. Opens a PR (base=staging, head=auto-sync/main-<sha>) and
+#      enables auto-merge so the merge queue lands it.
+#
+# This mirrors the path human PRs take through staging — same
+# rules, same gates, no special-case bypass.
+#
+# Loop safety:
+#
+# `GITHUB_TOKEN`-authored merges (including the merge queue's land
+# of the auto-sync PR) do NOT trigger downstream workflow runs
+# (GitHub Actions safety). So when the auto-sync PR lands on
+# staging, `auto-promote-staging.yml` is NOT triggered by that
+# push. The next developer push to staging triggers auto-promote
+# normally. No loop possible.
+#
+# Concurrency:
+#
+# Two pushes to main in quick succession (e.g., manual UI merge
+# immediately followed by auto-promote-staging's ff-merge) could
+# otherwise open two overlapping auto-sync PRs. The concurrency
+# group serializes runs; the second waits for the first to exit.
+# (The first run exits after opening + auto-merge-queueing the PR,
+# not after the merge actually completes — so multiple PRs can be
+# open simultaneously, but the merge queue handles them serially.)
+
+on:
+  push:
+    branches: [main]
+  # workflow_dispatch lets:
+  #   1. Operators manually backfill a missed sync (e.g. after a manual
+  #      UI merge that the runner missed).
+  #   2. auto-promote-staging.yml's polling tail explicitly invoke us
+  #      after the promote PR lands. This is load-bearing: when the
+  #      merge queue lands a promote-PR merge, the resulting push to
+  #      `main` is "by GITHUB_TOKEN", and per GitHub's no-recursion
+  #      rule (https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow)
+  #      that push event does NOT fire any downstream workflows. The
+  #      `on: push` trigger above is silently dead for the very pattern
+  #      we exist to handle. Verified empirically 2026-05-02 against
+  #      SHA 76c604fb (PR #2437 staging→main): only ONE workflow fired
+  #      (publish-workspace-server-image, dispatched explicitly by
+  #      auto-promote's polling tail with an App token). Every other
+  #      `on: push: branches: [main]` workflow — including this one —
+  #      was suppressed. Until the underlying merge call moves to an
+  #      App token, an explicit dispatch is the only reliable path.
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: auto-sync-main-to-staging
+  cancel-in-progress: false
+
+jobs:
+  sync-staging:
+    # ubuntu-latest matches every other workflow in this repo. The
+    # earlier `[self-hosted, macos, arm64]` was a copy-paste artefact
+    # from the molecule-controlplane repo (which IS private and uses a
+    # Mac runner) — molecule-core has no Mac runner registered, so the
+    # job sat unassigned whenever the trigger fired. Verified 2026-05-02:
+    # this is the ONLY workflow in molecule-core/.github/workflows/ with
+    # a non-ubuntu runs-on.
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout staging
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          ref: staging
+          token: ${{ secrets.AUTO_SYNC_TOKEN }}
+
+      - name: Configure git author
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Check if staging already contains main
+        id: check
+        run: |
+          set -euo pipefail
+          git fetch origin main
+          if git merge-base --is-ancestor origin/main HEAD; then
+            echo "needs_sync=false" >> "$GITHUB_OUTPUT"
+            {
+              echo "## ✅ No-op"
+              echo
+              echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
+            } >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "needs_sync=true" >> "$GITHUB_OUTPUT"
+            MAIN_SHORT=$(git rev-parse --short=8 origin/main)
+            echo "main_short=${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "branch=auto-sync/main-${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "::notice::staging is missing main's tip (${MAIN_SHORT}) — opening sync PR"
+          fi
+
+      - name: Create auto-sync branch + merge main
+        if: steps.check.outputs.needs_sync == 'true'
+        id: prep
+        run: |
+          set -euo pipefail
+          BRANCH="${{ steps.check.outputs.branch }}"
+
+          # If a previous auto-sync run already opened a branch for the
+          # same main sha, prefer reusing it (idempotent behavior on
+          # workflow restart). Force-update from latest staging anyway
+          # so it absorbs any staging-side commits that landed since.
+          git checkout -B "$BRANCH"
+
+          if git merge --ff-only origin/main; then
+            echo "did_ff=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Fast-forwarded ${BRANCH} to origin/main"
+          else
+            echo "did_ff=false" >> "$GITHUB_OUTPUT"
+            if ! git merge --no-ff origin/main -m "chore: sync main → staging (auto)"; then
+              # Hygiene: leave the work tree clean before failing.
+              git merge --abort || true
+              {
+                echo "## ❌ Conflict"
+                echo
+                echo "Auto-merge \`main → staging\` failed with conflicts."
+                echo "A human needs to resolve manually."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+            fi
+          fi
+
+      - name: Push auto-sync branch
+        if: steps.check.outputs.needs_sync == 'true'
+        run: |
+          set -euo pipefail
+          # Force-with-lease so a concurrent auto-sync run can't
+          # silently clobber an in-flight branch we just updated. If a
+          # different writer touched the branch, we abort and the next
+          # run picks up the latest state.
+          git push --force-with-lease origin "${{ steps.check.outputs.branch }}"
+
+      - name: Open auto-sync PR + enable auto-merge
+        if: steps.check.outputs.needs_sync == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+          BRANCH: ${{ steps.check.outputs.branch }}
+          MAIN_SHORT: ${{ steps.check.outputs.main_short }}
+          DID_FF: ${{ steps.prep.outputs.did_ff }}
+        run: |
+          set -euo pipefail
+
+          # Find existing PR for this branch (idempotent on workflow
+          # restart) before creating a new one.
+          PR_NUM=$(gh pr list --head "$BRANCH" --base staging --state open --json number --jq '.[0].number // ""')
+
+          if [ -z "$PR_NUM" ]; then
+            # Body lives in a temp file to keep the multi-line content
+            # out of the YAML block scalar (un-indented newlines inside
+            # an inline shell string break YAML parsing).
+            BODY_FILE=$(mktemp)
+            if [ "$DID_FF" = "true" ]; then
+              TITLE="chore: sync main → staging (auto, ff to ${MAIN_SHORT})"
+              cat > "$BODY_FILE" <<EOFBODY
+          Automated fast-forward of \`staging\` to \`origin/main\` (\`${MAIN_SHORT}\`). Staging has no in-flight commits that diverge from main. Merge queue lands this; no human action needed.
+
+          This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`. It exists because this repo's \`staging\` branch has a \`merge_queue\` ruleset that blocks direct pushes — even from the GitHub Actions integration.
+          EOFBODY
+            else
+              TITLE="chore: sync main → staging (auto, merge ${MAIN_SHORT})"
+              cat > "$BODY_FILE" <<EOFBODY
+          Automated merge of \`origin/main\` (\`${MAIN_SHORT}\`) into \`staging\`. Staging has commits main doesn't, so this is a non-ff merge that absorbs main's tip. Merge queue lands this.
+
+          This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`.
+          EOFBODY
+            fi
+
+            # gh pr create prints the URL on stdout; extract the PR number.
+            PR_URL=$(gh pr create \
+              --base staging \
+              --head "$BRANCH" \
+              --title "$TITLE" \
+              --body-file "$BODY_FILE")
+            PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
+            rm -f "$BODY_FILE"
+            echo "::notice::Opened PR #${PR_NUM}"
+          else
+            echo "::notice::Re-using existing PR #${PR_NUM} for ${BRANCH}"
+          fi
+
+          # Enable auto-merge — the merge queue picks it up once
+          # required gates are green. Use --merge for merge commits
+          # (matches the rest of this repo's PR convention).
+          if ! gh pr merge "$PR_NUM" --auto --merge 2>&1; then
+            echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
+          fi
+
+          {
+            echo "## ✅ Auto-sync PR opened"
+            echo
+            echo "- Branch: \`$BRANCH\`"
+            echo "- PR: #$PR_NUM"
+            echo "- Strategy: $([ "$DID_FF" = "true" ] && echo "ff" || echo "merge commit")"
+            echo
+            echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
+          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-tag-runtime.yml
+++ b/.github/workflows/auto-tag-runtime.yml
@ -57,42 +57,17 @@ jobs:
        id: bump
        if: steps.skip.outputs.skip != 'true'
        env:
-          # Gitea-shape token (act_runner forwards GITHUB_TOKEN as a
-          # short-lived per-run secret with read access to this repo).
-          # We hit `/api/v1/repos/.../pulls?state=closed` directly
-          # because `gh pr list` calls Gitea's GraphQL endpoint, which
-          # returns HTTP 405 (issue #75 / post-#66 sweep).
-          GITEA_TOKEN: ${{ github.token }}
-          REPO: ${{ github.repository }}
-          GITEA_API_URL: ${{ github.server_url }}/api/v1
-          PUSH_SHA: ${{ github.sha }}
+          GH_TOKEN: ${{ github.token }}
        run: |
-          # Find the merged PR whose merge_commit_sha matches this push.
-          # Gitea's `/repos/{owner}/{repo}/pulls?state=closed` returns
-          # PRs sorted newest-first; we paginate up to 50 and jq-filter
-          # on `merge_commit_sha == PUSH_SHA`. Bounded — auto-tag fires
-          # per push to main, so the matching PR is always among the
-          # most recent closures. 50 is comfortably more than the
-          # ~10-20 staging→main promotes that close in any reasonable
-          # window.
-          set -euo pipefail
-          PRS_JSON=$(curl --fail-with-body -sS \
-            -H "Authorization: token ${GITEA_TOKEN}" \
-            -H "Accept: application/json" \
-            "${GITEA_API_URL}/repos/${REPO}/pulls?state=closed&sort=newest&limit=50" \
-            2>/dev/null || echo "[]")
-          PR=$(printf '%s' "$PRS_JSON" \
-            | jq -c --arg sha "$PUSH_SHA" \
-                '[.[] | select(.merged_at != null and .merge_commit_sha == $sha)] | .[0] // empty')
+          # The merged PR for this push commit. `gh pr list --search` finds
+          # closed PRs whose merge commit matches; we take the first.
+          PR=$(gh pr list --state merged --search "${{ github.sha }}" --json number,labels --jq '.[0]' 2>/dev/null || echo "")
          if [ -z "$PR" ] || [ "$PR" = "null" ]; then
-            echo "No merged PR found for ${PUSH_SHA} — defaulting to patch bump."
+            echo "No merged PR found for ${{ github.sha }} — defaulting to patch bump."
            echo "kind=patch" >> "$GITHUB_OUTPUT"
            exit 0
          fi
-          # Gitea returns labels under `.labels[].name`, same shape as
-          # GitHub's REST. The previous `gh pr list --json number,labels`
-          # output was identical; jq filter unchanged.
-          LABELS=$(printf '%s' "$PR" | jq -r '.labels[]?.name // empty')
+          LABELS=$(echo "$PR" | jq -r '.labels[].name')
          if echo "$LABELS" | grep -qx 'release:major'; then
            echo "kind=major" >> "$GITHUB_OUTPUT"
          elif echo "$LABELS" | grep -qx 'release:minor'; then
--- a/.github/workflows/block-internal-paths.yml
+++ b/.github/workflows/block-internal-paths.yml
@ -1,7 +1,7 @@
 name: Block internal-flavored paths

 # Hard CI gate. Internal content (positioning, competitive briefs, sales
-# playbooks, PMM/press drip, draft campaigns) lives in molecule-ai/internal —
+# playbooks, PMM/press drip, draft campaigns) lives in Molecule-AI/internal —
 # this public monorepo must never re-acquire those paths. CEO directive
 # 2026-04-23 after a fleet-wide audit found 79 internal files leaked here.
 #
@ -135,7 +135,7 @@ jobs:
            echo "::error::Forbidden internal-flavored paths detected:"
            printf "$OFFENDING"
            echo ""
-            echo "These paths belong in molecule-ai/internal, not this public repo."
+            echo "These paths belong in Molecule-AI/internal, not this public repo."
            echo "See docs/internal-content-policy.md for canonical locations."
            echo ""
            echo "If your file is genuinely public-facing (e.g. a blog post"
--- a/.github/workflows/branch-protection-drift.yml
+++ b/.github/workflows/branch-protection-drift.yml
@ -19,7 +19,6 @@ on:
    branches: [staging, main]
    paths:
      - 'tools/branch-protection/**'
-      - '.github/workflows/**'
      - '.github/workflows/branch-protection-drift.yml'

 permissions:
@ -80,32 +79,3 @@ jobs:
          # Repo-admin scope, needed for /branches/:b/protection.
          GH_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADMIN_API }}
        run: bash tools/branch-protection/drift_check.sh
-
-      # Self-test the parity script before running it on the real
-      # workflows — pins the script's classification logic against
-      # synthetic safe/unsafe/missing/unsafe-mix/matrix fixtures so a
-      # regression in the script can't false-pass on the production
-      # workflow audit. Cheap (~0.5s); always runs.
-      - name: Self-test check-name parity script
-        run: bash tools/branch-protection/test_check_name_parity.sh
-
-      # Check-name parity gate (#144 / saved memory
-      # feedback_branch_protection_check_name_parity).
-      #
-      # drift_check.sh asserts the live branch protection matches what
-      # apply.sh would set; check_name_parity.sh closes the orthogonal
-      # gap: it asserts every required check name in apply.sh maps to a
-      # workflow job whose "always emits this status" shape is intact.
-      #
-      # The two checks fail in different scenarios:
-      #
-      #   - drift_check fails → live state was rewritten out-of-band
-      #     (UI click, manual PATCH).
-      #   - check_name_parity fails → an apply.sh required name has no
-      #     emitter, OR the emitting workflow has a top-level paths:
-      #     filter without per-step if-gates (the silent-block shape).
-      #
-      # Cheap (~1s); runs without the admin token because it only reads
-      # apply.sh + .github/workflows/ from the checkout.
-      - name: Run check-name parity gate
-        run: bash tools/branch-protection/check_name_parity.sh
--- a/.github/workflows/canary-staging.yml
+++ b/.github/workflows/canary-staging.yml
@ -20,19 +20,6 @@ on:
    # a few minutes under load — that's fine for a canary.
    - cron: '*/30 * * * *'
  workflow_dispatch:
-    inputs:
-      keep_on_failure:
-        description: >-
-          Skip teardown when the canary fails (debugging only). The
-          tenant org + EC2 + CF tunnel + DNS stay alive so an operator
-          can SSM into the workspace EC2 and capture docker logs of the
-          failing claude-code container. REMEMBER to manually delete
-          via DELETE /cp/admin/tenants/<slug> when done so the org
-          doesn't accumulate cost. Only honored on workflow_dispatch;
-          cron runs always tear down (we don't want unattended cron
-          to leak resources).
-        type: boolean
-        default: false

 # Serialise with the full-SaaS workflow so they don't contend for the
 # same org-create quota on staging. Different group key from
@ -93,14 +80,6 @@ jobs:
      # is "Token Plan only" but cheap-per-token and fast.
      E2E_MODEL_SLUG: MiniMax-M2.7-highspeed
      E2E_RUN_ID: "canary-${{ github.run_id }}"
-      # Debug-only: when an operator dispatches with keep_on_failure=true,
-      # the canary script's E2E_KEEP_ORG=1 path skips teardown so the
-      # tenant org + EC2 stay alive for SSM-based log capture. Cron runs
-      # never set this (the input only exists on workflow_dispatch) so
-      # unattended cron always tears down. See molecule-core#129
-      # failure mode #1 — capturing the actual exception requires
-      # docker logs from the live container.
-      E2E_KEEP_ORG: ${{ github.event.inputs.keep_on_failure == 'true' && '1' || '0' }}

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@ -158,28 +137,27 @@ jobs:
        id: canary
        run: bash tests/e2e/test_staging_full_saas.sh

-      # Alerting: open a sticky issue on the FIRST failure; comment on
-      # subsequent failures; auto-close on next green. Comment-on-existing
-      # de-duplicates so a single open issue accumulates the streak —
-      # ops sees one issue with N comments rather than N issues.
+      # Alerting: open an issue only after THREE consecutive failures so
+      # transient flakes (Cloudflare DNS hiccup, AWS API blip) don't spam
+      # the issue list. If an issue is already open, we still comment on
+      # every failure so ops sees the streak. Auto-close on next green.
      #
-      # Why no consecutive-failures threshold (e.g., wait 3 runs before
-      # filing): the prior threshold check used
-      # `github.rest.actions.listWorkflowRuns()` which Gitea 1.22.6 does
-      # not expose (returns 404). On Gitea Actions the threshold call
-      # ALWAYS failed, breaking the entire alerting step and going days
-      # silent on real regressions (38h+ chronic red on 2026-05-07/08
-      # before this fix; tracked in molecule-core#129). Filing on first
-      # failure is also better UX — we want to know about the first red,
-      # not wait 90 min for it to "count." Real flakes get one issue +
-      # a quick close-on-green; persistent reds accumulate comments.
+      # Threshold rationale: canary fires every 30 min, so 3 failures =
+      # ~90 min of consecutive red — well past any single-run flake but
+      # still tight enough that a real outage gets surfaced before the
+      # next deploy window.
      - name: Open issue on failure
        if: failure()
        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        env:
+          # Inject the workflow path explicitly — context.workflow is
+          # the *name*, not the file path the actions API needs.
+          WORKFLOW_PATH: '.github/workflows/canary-staging.yml'
+          CONSECUTIVE_THRESHOLD: '3'
        with:
          script: |
            const title = '🔴 Canary failing: staging SaaS smoke';
-            const runURL = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;

            // Find an existing open canary issue (stable title match).
            // If one exists, this isn't a "first failure" — comment and exit.
@ -199,12 +177,32 @@ jobs:
              return;
            }

-            // No open issue yet — file one on this first failure. The
-            // comment-on-existing branch above means subsequent failures
-            // accumulate as comments on this same issue, so we don't
-            // spam new issues per run.
+            // No open issue yet — check the last N-1 runs' conclusions.
+            // We open the issue only if the last (THRESHOLD-1) runs ALSO
+            // failed (so this is the 3rd consecutive red).
+            const threshold = parseInt(process.env.CONSECUTIVE_THRESHOLD, 10);
+            const { data: runs } = await github.rest.actions.listWorkflowRuns({
+              owner: context.repo.owner, repo: context.repo.repo,
+              workflow_id: process.env.WORKFLOW_PATH,
+              status: 'completed',
+              per_page: threshold,
+              // Skip the current in-progress run; it isn't 'completed' yet.
+            });
+            // listWorkflowRuns returns recent first. We need (threshold-1)
+            // prior failures (current run is the threshold-th).
+            const priorFailures = (runs.workflow_runs || [])
+              .slice(0, threshold - 1)
+              .filter(r => r.id !== context.runId)
+              .filter(r => r.conclusion === 'failure')
+              .length;
+            if (priorFailures < threshold - 1) {
+              core.info(`Below threshold: ${priorFailures + 1}/${threshold} consecutive failures — not filing yet`);
+              return;
+            }
+
            const body =
-              `Canary run failed at ${new Date().toISOString()}.\n\n` +
+              `Canary run failed at ${new Date().toISOString()}, ` +
+              `${threshold} consecutive runs red.\n\n` +
              `Run: ${runURL}\n\n` +
              `This issue auto-closes on the next green canary run. ` +
              `Consecutive failures add a comment here rather than a new issue.`;
@ -213,7 +211,7 @@ jobs:
              title, body,
              labels: ['canary-staging', 'bug'],
            });
-            core.info('Opened canary failure issue (first red)');
+            core.info(`Opened canary failure issue (${threshold} consecutive reds)`);

      - name: Auto-close canary issue on success
        if: success()
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -87,7 +87,7 @@ jobs:
        run: go mod download
      - if: needs.changes.outputs.platform == 'true'
        run: go build ./cmd/server
-      # CLI (molecli) moved to standalone repo: github.com/molecule-ai/molecule-cli
+      # CLI (molecli) moved to standalone repo: git.moleculesai.app/molecule-ai/molecule-cli
      - if: needs.changes.outputs.platform == 'true'
        run: go vet ./... || true
      - if: needs.changes.outputs.platform == 'true'
@ -165,7 +165,7 @@ jobs:
              # Strip the package-import prefix so we can match .coverage-allowlist.txt
              # entries written as paths relative to workspace-server/.
              # Handle both module paths: platform/workspace-server/... and platform/...
-              rel=$(echo "$file" | sed 's|^github.com/molecule-ai/molecule-monorepo/platform/workspace-server/||; s|^github.com/molecule-ai/molecule-monorepo/platform/||')
+              rel=$(echo "$file" | sed 's|^github.com/Molecule-AI/molecule-monorepo/platform/workspace-server/||; s|^github.com/Molecule-AI/molecule-monorepo/platform/||')

              if echo "$ALLOWLIST" | grep -qxF "$rel"; then
                echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry."
@ -235,13 +235,7 @@ jobs:
        run: npx vitest run --coverage
      - name: Upload coverage summary as artifact
        if: needs.changes.outputs.canvas == 'true' && always()
-        # Pinned to v3 for Gitea act_runner v0.6 compatibility — v4+ uses
-        # the GHES 3.10+ artifact protocol that Gitea 1.22.x does NOT
-        # implement, surfacing as `GHESNotSupportedError: @actions/artifact
-        # v2.0.0+, upload-artifact@v4+ and download-artifact@v4+ are not
-        # currently supported on GHES`. Drop this pin when Gitea ships
-        # the v4 protocol (tracked: post-Gitea-1.23 followup).
-        uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: canvas-coverage-${{ github.run_id }}
          path: canvas/coverage/
@ -249,8 +243,8 @@ jobs:
          if-no-files-found: warn

  # MCP Server + SDK removed from CI — now in standalone repos:
-  # - github.com/molecule-ai/molecule-mcp-server (npm CI)
-  # - github.com/molecule-ai/molecule-sdk-python (PyPI CI)
+  # - git.moleculesai.app/molecule-ai/molecule-mcp-server (npm CI)
+  # - git.moleculesai.app/molecule-ai/molecule-sdk-python (PyPI CI)

  # e2e-api job moved to .github/workflows/e2e-api.yml (issue #458).
  # It now has workflow-level concurrency (cancel-in-progress: false) so
@ -440,5 +434,5 @@ jobs:
          fi

      # SDK + plugin validation moved to standalone repo:
-      # github.com/molecule-ai/molecule-sdk-python
+      # git.moleculesai.app/molecule-ai/molecule-sdk-python

--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -1,92 +1,36 @@
 name: CodeQL

-# Stub workflow — CodeQL Action is structurally incompatible with Gitea
-# Actions (post-2026-05-06 SCM migration off GitHub).
+# Controls CodeQL scan triggers for this repo.
 #
-# Why this is a stub, not a real CodeQL run:
+# GitHub's "Code quality" default setup (the UI-configured one) is
+# hardcoded to only scan the default branch — on this repo that's
+# `staging`, so PRs promoting staging→main would otherwise never be
+# scanned. This workflow fills that gap by explicitly scanning both
+# branches on push and PR.
 #
-# 1. github/codeql-action/init@v4 hits api.github.com endpoints
-#    (CodeQL CLI bundle download + query-pack registry + telemetry)
-#    that Gitea 1.22.x does NOT proxy. The act_runner has
-#    GITHUB_SERVER_URL=https://git.moleculesai.app correctly set
-#    (per saved memory feedback_act_runner_github_server_url and
-#    /config.yaml on the operator host), but the Gitea API surface
-#    simply does not implement the codeql-action bundle endpoints.
-#    Observed in run 1d/3101 (2026-05-07): "::error::404 page not
-#    found" inside the Initialize CodeQL step, before any analysis.
-#
-# 2. PR #35 attempted to mark `continue-on-error: true` at the JOB
-#    level (correct YAML structure). Gitea 1.22.6 does NOT propagate
-#    job-level continue-on-error to the commit-status API — every
-#    matrix leg still posts `failure` to the status surface, which
-#    keeps OVERALL=failure on every push to main + staging and
-#    blocks visual auto-promote signals (#156).
-#
-# 3. Hongming policy decision (2026-05-07, task #156): CodeQL is
-#    ADVISORY, not blocking, on Gitea Actions. We do not block PR
-#    merge or staging→main promotion on CodeQL findings until we
-#    have a Gitea-compatible static-analysis pipeline.
-#
-# What this stub preserves:
-#
-# - Workflow name `CodeQL` (referenced by auto-promote-staging.yml
-#   line 67 as a workflow_run gate — must stay stable).
-# - Job name template `Analyze (${{ matrix.language }})` and the
-#   3-leg matrix (go, javascript-typescript, python). Branch
-#   protection / required-check parity (#144) keys on these
-#   exact context names.
-# - merge_group + push + pull_request + schedule triggers, so the
-#   merge-queue check name still resolves (per saved memory
-#   feedback_branch_protection_check_name_parity).
-#
-# Re-enabling real analysis (future work):
-#
-# - Option A: self-hosted Semgrep / OpenGrep via a custom action
-#   that doesn't hit api.github.com. Tracked behind #156 follow-up.
-# - Option B: Sonatype Nexus IQ or similar, called from a step
-#   that uses the Gitea-issued token only.
-# - Option C: re-host this workflow on a small GitHub mirror used
-#   ONLY for SAST (push-mirrored from Gitea). Acceptable trade-off
-#   if/when payment is restored on a non-suspended GitHub org —
-#   but per saved memory feedback_no_single_source_of_truth, we
-#   should design for multi-vendor backup, not GitHub-only SAST.
-#
-# Until one of those lands, this stub keeps commit-status green so
-# the auto-promote chain isn't permanently red on a tool we cannot
-# actually run.
-#
-# Security policy: ADVISORY. We accept the residual risk of un-scanned
-# pushes during this window. Compensating controls in place:
-#   - secret-scan.yml runs on every push (active, blocks on hits)
-#   - block-internal-paths.yml blocks forbidden file paths
-#   - lint-curl-status-capture.yml catches one specific class of bug
-#   - branch-protection-drift.yml + the merge_group required-checks
-#     parity keep the gate surface stable
-# These are not equivalent to CodeQL coverage. Status of the
-# replacement plan is tracked in #156.
+# Runs on ubuntu-latest (GHA-hosted — public repo, free). GHAS is NOT
+# enabled on this repo, so results are not uploaded to the Security
+# tab — the scan fails the PR check on findings, and the SARIF is
+# kept as a workflow artifact for triage.

 on:
  push:
    branches: [main, staging]
  pull_request:
    branches: [main, staging]
-  # Required so the matrix legs emit a real result on the queued
-  # commit instead of a false-green when merge queue is enabled.
-  # Per saved memory feedback_branch_protection_check_name_parity:
-  # path-filtered / matrix workflows MUST emit the protected name
-  # via a job that always runs.
+  # GitHub merge queue fires `merge_group` for the queue's pre-merge CI run.
+  # Required so CodeQL Analyze checks get a real result on the queued
+  # commit instead of a false-green. Event only fires once merge queue is
+  # enabled on the target branch — safe to add unconditionally.
  merge_group:
    types: [checks_requested]
  schedule:
-    # Weekly heartbeat. Cheap on a stub (the no-op job is ~5s) but
-    # keeps the workflow visible in Gitea's Actions UI so the next
-    # operator notices it's a stub instead of a missing surface.
+    # Weekly run picks up findings in code that hasn't been touched.
    - cron: '30 1 * * 0'

-# Workflow-level concurrency: only one stub run per branch/PR at a
-# time. cancel-in-progress: false because a quick follow-up push
-# shouldn't kill an in-flight run — even though the stub is fast,
-# the contract should match a real CodeQL run for when we re-enable.
+# Workflow-level concurrency: only one CodeQL run per branch/PR at a time.
+# `cancel-in-progress: false` queues new runs so a quick follow-up push
+# doesn't nuke a 45-min analysis mid-flight.
 concurrency:
  group: codeql-${{ github.ref }}
  cancel-in-progress: false
@ -94,17 +38,13 @@ concurrency:
 permissions:
  actions: read
  contents: read
-  # No security-events: write — we don't call the upload API anyway,
-  # GHAS isn't on Gitea.
+  # No security-events: write — we don't call the upload API.

 jobs:
  analyze:
-    # Job NAME shape is load-bearing — auto-promote-staging.yml +
-    # branch protection both key on `Analyze (${{ matrix.language }})`.
-    # Do NOT rename without coordinating both surfaces.
    name: Analyze (${{ matrix.language }})
    runs-on: ubuntu-latest
-    timeout-minutes: 5
+    timeout-minutes: 45

    strategy:
      fail-fast: false
@ -112,25 +52,68 @@ jobs:
        language: [go, javascript-typescript, python]

    steps:
-      # Single-step stub: log the policy decision + emit success.
-      # Exit 0 explicitly so the commit-status API records `success`
-      # for each of the three matrix legs.
-      - name: CodeQL stub (advisory, non-blocking on Gitea)
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      # github-app-auth sibling-checkout removed 2026-05-07 (#157):
+      # plugin was dropped + the Dockerfile no longer needs it.
+      # jq is pre-installed on ubuntu-latest — no setup step needed.
+
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+        with:
+          languages: ${{ matrix.language }}
+          # security-extended widens past the default to include the
+          # full security-query set for a public SaaS surface.
+          queries: security-extended
+
+      - name: Autobuild
+        uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+
+      - name: Perform CodeQL Analysis
+        id: analyze
+        uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+        with:
+          category: "/language:${{ matrix.language }}"
+          # upload: never — GHAS isn't enabled on this repo, so the
+          # upload API 403s. Write SARIF locally instead.
+          upload: never
+          output: sarif-results/${{ matrix.language }}
+
+      - name: Parse SARIF + fail on findings
+        # The analyze step writes <database>.sarif into the output
+        # directory — database name is the short CodeQL lang id, not
+        # the matrix value (e.g. "javascript-typescript" →
+        # javascript.sarif), so glob rather than hardcode.
+        # Filter to error/warning severity: security-extended emits
+        # "note" rows for informational findings we don't want to fail
+        # the build over.
        shell: bash
        run: |
          set -euo pipefail
-          cat <<EOF
-          CodeQL is currently ADVISORY on Gitea Actions (post-2026-05-06).
-          Language matrix leg: ${{ matrix.language }}
-          Reason: github/codeql-action/init@v4 calls api.github.com
-                  bundle endpoints that Gitea 1.22.x does not implement.
-                  Observed: "::error::404 page not found" in the Init
-                  CodeQL step on every prior run.
-          Policy: per Hongming decision 2026-05-07 (#156), CodeQL is
-                  non-blocking until a Gitea-compatible SAST pipeline
-                  lands. See workflow file header for replacement
-                  options + compensating controls.
-          Status: emitting success so auto-promote isn't permanently
-                  red on a tool we cannot actually run today.
-          EOF
-          echo "::notice::CodeQL ${{ matrix.language }} — advisory stub, success."
+          dir="sarif-results/${{ matrix.language }}"
+          sarif=$(ls "$dir"/*.sarif 2>/dev/null | head -1 || true)
+          if [ -z "$sarif" ] || [ ! -f "$sarif" ]; then
+            echo "::error::No SARIF file found under $dir"
+            ls -la "$dir" 2>/dev/null || true
+            exit 1
+          fi
+          echo "Parsing $sarif"
+          count=$(jq '[.runs[].results[] | select(.level == "error" or .level == "warning")] | length' "$sarif")
+          echo "CodeQL findings (error+warning) for ${{ matrix.language }}: $count"
+          if [ "$count" -gt 0 ]; then
+            echo "::error::CodeQL found $count issues. Details below; full SARIF in the artifact."
+            jq -r '.runs[].results[] | select(.level == "error" or .level == "warning") | "  - [\(.level)] \(.ruleId // "?"): \(.message.text // "(no message)") @ \(.locations[0].physicalLocation.artifactLocation.uri // "?"):\(.locations[0].physicalLocation.region.startLine // "?")"' "$sarif"
+            exit 1
+          fi
+
+      - name: Upload SARIF artifact
+        # Keep SARIF around on success + failure so triagers can diff.
+        # 14-day retention — longer than default 3, short enough not
+        # to bloat quota.
+        if: always()
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
+        with:
+          name: codeql-sarif-${{ matrix.language }}
+          path: sarif-results/${{ matrix.language }}/
+          retention-days: 14
--- a/.github/workflows/e2e-api.yml
+++ b/.github/workflows/e2e-api.yml
@ -12,59 +12,6 @@ name: E2E API Smoke Test
 # spending CI cycles. See the in-job comment on the `e2e-api` job for
 # why this is one job (not two-jobs-sharing-name) and the 2026-04-29
 # PR #2264 incident that drove the consolidation.
-#
-# Parallel-safety (Class B Hongming-owned CICD red sweep, 2026-05-08)
-# -------------------------------------------------------------------
-# Same substrate hazard as PR #98 (handlers-postgres-integration). Our
-# Gitea act_runner runs with `container.network: host` (operator host
-# `/opt/molecule/runners/config.yaml`), which means:
-#
-#   * Two concurrent runs both try to bind their `-p 15432:5432` /
-#     `-p 16379:6379` host ports — the second postgres/redis FATALs
-#     with `Address in use` and `docker run` returns exit 125 with
-#     `Conflict. The container name "/molecule-ci-postgres" is already
-#     in use by container ...`. Verified in run a7/2727 on 2026-05-07.
-#   * The fixed container names `molecule-ci-postgres` / `-redis` (the
-#     pre-fix shape) collide on name AS WELL AS port. The cleanup-with-
-#     `docker rm -f` at the start of the second job KILLS the first
-#     job's still-running postgres/redis.
-#
-# Fix shape (mirrors PR #98's bridge-net pattern, adapted because
-# platform-server is a Go binary on the host, not a containerised
-# step):
-#
-#   1. Unique container names per run:
-#         pg-e2e-api-${RUN_ID}-${RUN_ATTEMPT}
-#         redis-e2e-api-${RUN_ID}-${RUN_ATTEMPT}
-#      `${RUN_ID}-${RUN_ATTEMPT}` is unique even across reruns of the
-#      same run_id.
-#   2. Ephemeral host port per run (`-p 0:5432`), then read the actual
-#      bound port via `docker port` and export DATABASE_URL/REDIS_URL
-#      pointing at it. No fixed host-port → no port collision.
-#   3. `127.0.0.1` (NOT `localhost`) in URLs — IPv6 first-resolve was
-#      the original flake fixed in #92 and the script's still IPv6-
-#      enabled.
-#   4. `if: always()` cleanup so containers don't leak when test steps
-#      fail.
-#
-# Issue #94 items #2 + #3 (also fixed here):
-#   * Pre-pull `alpine:latest` so the platform-server's provisioner
-#     (`internal/handlers/container_files.go`) can stand up its
-#     ephemeral token-write helper without a daemon.io round-trip.
-#   * Create `molecule-monorepo-net` bridge network if missing so the
-#     provisioner's container.HostConfig {NetworkMode: ...} attach
-#     succeeds.
-# Item #1 (timeouts) — evidence on recent runs (77/3191, ae/4270, 0e/
-# 2318) shows Postgres ready in 3s, Redis in 1s, Platform in 1s when
-# they DO come up. Timeouts are not the bottleneck; not bumped.
-#
-# Item explicitly NOT fixed here: failing test `Status back online`
-# fails because the platform's langgraph workspace template image
-# (ghcr.io/molecule-ai/workspace-template-langgraph:latest) returns
-# 403 Forbidden post-2026-05-06 GitHub org suspension. That is a
-# template-registry resolution issue (ADR-002 / local-build mode) and
-# belongs in a separate change that touches workspace-server, not
-# this workflow file.

 on:
  push:
@ -131,14 +78,11 @@ jobs:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    env:
-      # Unique per-run container names so concurrent runs on the host-
-      # network act_runner don't collide on name OR port.
-      # `${RUN_ID}-${RUN_ATTEMPT}` stays unique across reruns of the
-      # same run_id. PORT is set later (after docker port lookup) since
-      # we let Docker assign an ephemeral host port.
-      PG_CONTAINER: pg-e2e-api-${{ github.run_id }}-${{ github.run_attempt }}
-      REDIS_CONTAINER: redis-e2e-api-${{ github.run_id }}-${{ github.run_attempt }}
+      DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable
+      REDIS_URL: redis://localhost:16379
      PORT: "8080"
+      PG_CONTAINER: molecule-ci-postgres
+      REDIS_CONTAINER: molecule-ci-redis
    steps:
      - name: No-op pass (paths filter excluded this commit)
        if: needs.detect-changes.outputs.api != 'true'
@ -153,53 +97,11 @@ jobs:
          go-version: 'stable'
          cache: true
          cache-dependency-path: workspace-server/go.sum
-      - name: Pre-pull alpine + ensure provisioner network (Issue #94 items #2 + #3)
-        if: needs.detect-changes.outputs.api == 'true'
-        run: |
-          # Provisioner uses alpine:latest for ephemeral token-write
-          # containers (workspace-server/internal/handlers/container_files.go).
-          # Pre-pull so the first provision in test_api.sh doesn't race
-          # the daemon's pull cache. Idempotent — `docker pull` is a no-op
-          # when the image is already present.
-          docker pull alpine:latest >/dev/null
-          # Provisioner attaches workspace containers to
-          # molecule-monorepo-net (workspace-server/internal/provisioner/
-          # provisioner.go::DefaultNetwork). The bridge already exists on
-          # the operator host's docker daemon — `network create` is
-          # idempotent via `|| true`.
-          docker network create molecule-monorepo-net >/dev/null 2>&1 || true
-          echo "alpine:latest pre-pulled; molecule-monorepo-net ensured."
      - name: Start Postgres (docker)
        if: needs.detect-changes.outputs.api == 'true'
        run: |
-          # Defensive cleanup — only matches THIS run's container name,
-          # so it cannot kill a sibling run's postgres. (Pre-fix the
-          # name was static and this rm hit other runs' containers.)
          docker rm -f "$PG_CONTAINER" 2>/dev/null || true
-          # `-p 0:5432` requests an ephemeral host port; we read it back
-          # below and export DATABASE_URL.
-          docker run -d --name "$PG_CONTAINER" \
-            -e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule \
-            -p 0:5432 postgres:16 >/dev/null
-          # Resolve the host-side port assignment. `docker port` prints
-          # `0.0.0.0:NNNN` (and on host-net runners may also print an
-          # IPv6 line — take the first IPv4 line).
-          PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
-          if [ -z "$PG_PORT" ]; then
-            # Fallback: any first line. Some Docker versions print only
-            # one line.
-            PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | head -1 | awk -F: '{print $NF}')
-          fi
-          if [ -z "$PG_PORT" ]; then
-            echo "::error::Could not resolve host port for $PG_CONTAINER"
-            docker port "$PG_CONTAINER" 5432/tcp || true
-            docker logs "$PG_CONTAINER" || true
-            exit 1
-          fi
-          # 127.0.0.1 (NOT localhost) — IPv6 first-resolve flake (#92).
-          echo "PG_PORT=${PG_PORT}" >> "$GITHUB_ENV"
-          echo "DATABASE_URL=postgres://dev:dev@127.0.0.1:${PG_PORT}/molecule?sslmode=disable" >> "$GITHUB_ENV"
-          echo "Postgres host port: ${PG_PORT}"
+          docker run -d --name "$PG_CONTAINER" -e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule -p 15432:5432 postgres:16
          for i in $(seq 1 30); do
            if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then
              echo "Postgres ready after ${i}s"
@ -214,20 +116,7 @@ jobs:
        if: needs.detect-changes.outputs.api == 'true'
        run: |
          docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
-          docker run -d --name "$REDIS_CONTAINER" -p 0:6379 redis:7 >/dev/null
-          REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
-          if [ -z "$REDIS_PORT" ]; then
-            REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | head -1 | awk -F: '{print $NF}')
-          fi
-          if [ -z "$REDIS_PORT" ]; then
-            echo "::error::Could not resolve host port for $REDIS_CONTAINER"
-            docker port "$REDIS_CONTAINER" 6379/tcp || true
-            docker logs "$REDIS_CONTAINER" || true
-            exit 1
-          fi
-          echo "REDIS_PORT=${REDIS_PORT}" >> "$GITHUB_ENV"
-          echo "REDIS_URL=redis://127.0.0.1:${REDIS_PORT}" >> "$GITHUB_ENV"
-          echo "Redis host port: ${REDIS_PORT}"
+          docker run -d --name "$REDIS_CONTAINER" -p 16379:6379 redis:7
          for i in $(seq 1 15); do
            if docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG; then
              echo "Redis ready after ${i}s"
@ -246,15 +135,13 @@ jobs:
        if: needs.detect-changes.outputs.api == 'true'
        working-directory: workspace-server
        run: |
-          # DATABASE_URL + REDIS_URL exported by the start-postgres /
-          # start-redis steps point at this run's per-run host ports.
          ./platform-server > platform.log 2>&1 &
          echo $! > platform.pid
      - name: Wait for /health
        if: needs.detect-changes.outputs.api == 'true'
        run: |
          for i in $(seq 1 30); do
-            if curl -sf http://127.0.0.1:8080/health > /dev/null; then
+            if curl -sf http://localhost:8080/health > /dev/null; then
              echo "Platform up after ${i}s"
              exit 0
            fi
@ -298,9 +185,6 @@ jobs:
            kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
          fi
      - name: Stop service containers
-        # always() so containers don't leak when test steps fail. The
-        # cleanup is best-effort: if the container is already gone
-        # (e.g. concurrent rerun race), don't fail the job.
        if: always() && needs.detect-changes.outputs.api == 'true'
        run: |
          docker rm -f "$PG_CONTAINER" 2>/dev/null || true
--- a/.github/workflows/e2e-staging-canvas.yml
+++ b/.github/workflows/e2e-staging-canvas.yml
@ -22,9 +22,9 @@ on:
  # spending CI cycles. See e2e-api.yml for the rationale on why this
  # is a single job rather than two-jobs-sharing-name.
  push:
-    branches: [main]
+    branches: [main, staging]
  pull_request:
-    branches: [main]
+    branches: [main, staging]
  workflow_dispatch:
  schedule:
    # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
@ -139,11 +139,7 @@ jobs:

      - name: Upload Playwright report on failure
        if: failure() && needs.detect-changes.outputs.canvas == 'true'
-        # Pinned to v3 for Gitea act_runner v0.6 compatibility — v4+ uses
-        # the GHES 3.10+ artifact protocol that Gitea 1.22.x does NOT
-        # implement (see ci.yml upload step for the canonical error
-        # cite). Drop this pin when Gitea ships the v4 protocol.
-        uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: playwright-report-staging
          path: canvas/playwright-report-staging/
@ -151,8 +147,7 @@ jobs:

      - name: Upload screenshots on failure
        if: failure() && needs.detect-changes.outputs.canvas == 'true'
-        # Pinned to v3 for Gitea act_runner v0.6 compatibility (see above).
-        uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: playwright-screenshots
          path: canvas/test-results/
--- a/.github/workflows/e2e-staging-external.yml
+++ b/.github/workflows/e2e-staging-external.yml
@ -32,7 +32,7 @@ name: E2E Staging External Runtime

 on:
  push:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/workspace.go'
      - 'workspace-server/internal/handlers/registry.go'
@ -44,7 +44,7 @@ on:
      - 'tests/e2e/test_staging_external_runtime.sh'
      - '.github/workflows/e2e-staging-external.yml'
  pull_request:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/workspace.go'
      - 'workspace-server/internal/handlers/registry.go'
--- a/.github/workflows/e2e-staging-saas.yml
+++ b/.github/workflows/e2e-staging-saas.yml
@ -20,12 +20,13 @@ name: E2E Staging SaaS (full lifecycle)
 #     via the same paths watcher that e2e-api.yml uses)

 on:
-  # Trunk-based (Phase 3 of internal#81): main is the only branch.
-  # Previously this fired on staging push too because staging was a
-  # superset of main and ran the gate ahead of auto-promote; with no
-  # staging branch, main is where E2E gates the deploy.
+  # Fire on staging push too — previously this only ran on main, which
+  # meant the most thorough end-to-end test caught regressions AFTER
+  # they shipped to staging (and then to the auto-promote PR). Running
+  # on staging push catches them BEFORE the staging→main promotion
+  # opens, so a green canary into auto-promote is more meaningful.
  push:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/registry.go'
      - 'workspace-server/internal/handlers/workspace_provision.go'
@ -35,7 +36,7 @@ on:
      - 'tests/e2e/test_staging_full_saas.sh'
      - '.github/workflows/e2e-staging-saas.yml'
  pull_request:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/registry.go'
      - 'workspace-server/internal/handlers/workspace_provision.go'
--- a/.github/workflows/handlers-postgres-integration.yml
+++ b/.github/workflows/handlers-postgres-integration.yml
@ -14,42 +14,12 @@ name: Handlers Postgres Integration
 # self-review caught it took 2 minutes to set up and would have caught
 # the bug at PR-time.
 #
-# Why this workflow does NOT use `services: postgres:` (Class B fix)
-# ------------------------------------------------------------------
-# Our act_runner config has `container.network: host` (operator host
-# /opt/molecule/runners/config.yaml), which act_runner applies to BOTH
-# the job container AND every service container. With host-net, two
-# concurrent runs of this workflow both try to bind 0.0.0.0:5432 — the
-# second postgres FATALs with `could not create any TCP/IP sockets:
-# Address in use`, and Docker auto-removes it (act_runner sets
-# AutoRemove:true on service containers). By the time the migrations
-# step runs `psql`, the postgres container is gone, hence
-# `Connection refused` then `failed to remove container: No such
-# container` at cleanup time.
+# This job spins a Postgres service container, applies the migration,
+# and runs `go test -tags=integration` against a live DB. Required
+# check on staging branch protection — backend handler PRs cannot
+# merge without a real-DB regression gate.
 #
-# Per-job `container.network` override is silently ignored by
-# act_runner — `--network and --net in the options will be ignored.`
-# appears in the runner log. Documented constraint.
-#
-# So we sidestep `services:` entirely. The job container still uses
-# host-net (inherited from runner config; required for cache server
-# discovery on the bridge IP 172.18.0.17:42631). We launch a sibling
-# postgres on the existing `molecule-monorepo-net` bridge with a
-# UNIQUE name per run — `pg-handlers-${RUN_ID}-${RUN_ATTEMPT}` — and
-# read its bridge IP via `docker inspect`. A host-net job container
-# can reach a bridge-net container directly via the bridge IP (verified
-# manually on operator host 2026-05-08).
-#
-# Trade-offs vs. the original `services:` shape:
-#   + No host-port collision; N parallel runs share the bridge cleanly
-#   + `if: always()` cleanup runs even on test-step failure
-#   - One more step in the workflow (+~3 lines)
-#   - Requires `molecule-monorepo-net` to exist on the operator host
-#     (it does; declared in docker-compose.yml + docker-compose.infra.yml)
-#
-# Class B Hongming-owned CICD red sweep, 2026-05-08.
-#
-# Cost: ~30s job (postgres pull from cache + go build + 4 tests).
+# Cost: ~30s job (postgres pull from GH cache + go build + 4 tests).

 on:
  push:
@ -89,14 +59,20 @@ jobs:
    name: Handlers Postgres Integration
    needs: detect-changes
    runs-on: ubuntu-latest
-    env:
-      # Unique name per run so concurrent jobs don't collide on the
-      # bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
-      # workflow_dispatch reruns of the same run_id.
-      PG_NAME: pg-handlers-${{ github.run_id }}-${{ github.run_attempt }}
-      # Bridge network already exists on the operator host (declared
-      # in docker-compose.yml + docker-compose.infra.yml).
-      PG_NETWORK: molecule-monorepo-net
+    services:
+      postgres:
+        image: postgres:15-alpine
+        env:
+          POSTGRES_PASSWORD: test
+          POSTGRES_DB: molecule
+        ports:
+          - 5432:5432
+        # GHA spins this with --health-cmd built in for postgres images.
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 5s
+          --health-timeout 5s
+          --health-retries 10
    defaults:
      run:
        working-directory: workspace-server
@ -113,57 +89,16 @@ jobs:
        with:
          go-version: 'stable'

-      - if: needs.detect-changes.outputs.handlers == 'true'
-        name: Start sibling Postgres on bridge network
-        working-directory: .
-        run: |
-          # Sanity: the bridge network must exist on the operator host.
-          # Hard-fail loud if it doesn't — easier to spot than a silent
-          # auto-create that diverges from the rest of the stack.
-          if ! docker network inspect "${PG_NETWORK}" >/dev/null 2>&1; then
-            echo "::error::Bridge network '${PG_NETWORK}' missing on operator host. Re-run docker-compose.infra.yml or check ops handbook."
-            exit 1
-          fi
-
-          # If a stale container with the same name exists (rerun on
-          # the same run_id), wipe it first.
-          docker rm -f "${PG_NAME}" >/dev/null 2>&1 || true
-
-          docker run -d \
-            --name "${PG_NAME}" \
-            --network "${PG_NETWORK}" \
-            --health-cmd "pg_isready -U postgres" \
-            --health-interval 5s \
-            --health-timeout 5s \
-            --health-retries 10 \
-            -e POSTGRES_PASSWORD=test \
-            -e POSTGRES_DB=molecule \
-            postgres:15-alpine >/dev/null
-
-          # Read back the bridge IP. Always present immediately after
-          # `docker run -d` for bridge networks.
-          PG_HOST=$(docker inspect "${PG_NAME}" \
-            --format "{{(index .NetworkSettings.Networks \"${PG_NETWORK}\").IPAddress}}")
-          if [ -z "${PG_HOST}" ]; then
-            echo "::error::Could not resolve PG_HOST for ${PG_NAME} on ${PG_NETWORK}"
-            docker logs "${PG_NAME}" || true
-            exit 1
-          fi
-          echo "PG_HOST=${PG_HOST}" >> "$GITHUB_ENV"
-          echo "INTEGRATION_DB_URL=postgres://postgres:test@${PG_HOST}:5432/molecule?sslmode=disable" >> "$GITHUB_ENV"
-          echo "Started ${PG_NAME} at ${PG_HOST}:5432"
-
      - if: needs.detect-changes.outputs.handlers == 'true'
        name: Apply migrations to Postgres service
        env:
          PGPASSWORD: test
        run: |
-          # Wait for postgres to actually accept connections. Docker's
-          # health-cmd handles container-side readiness, but the wire
-          # to the bridge IP is best-tested with pg_isready directly.
+          # Wait for postgres to actually accept connections (the
+          # GHA --health-cmd is best-effort but psql can still race).
          for i in {1..15}; do
-            if pg_isready -h "${PG_HOST}" -p 5432 -U postgres -q; then break; fi
-            echo "waiting for postgres at ${PG_HOST}:5432..."; sleep 2
+            if pg_isready -h localhost -p 5432 -U postgres -q; then break; fi
+            echo "waiting for postgres..."; sleep 2
          done

          # Apply every .up.sql in lexicographic order with
@ -196,7 +131,7 @@ jobs:
          # not fine once a cross-table atomicity test came in.
          set +e
          for migration in $(ls migrations/*.sql 2>/dev/null | grep -v '\.down\.sql$' | sort); do
-            if psql -h "${PG_HOST}" -U postgres -d molecule -v ON_ERROR_STOP=1 \
+            if psql -h localhost -U postgres -d molecule -v ON_ERROR_STOP=1 \
                  -f "$migration" >/dev/null 2>&1; then
              echo "✓ $(basename "$migration")"
            else
@ -210,7 +145,7 @@ jobs:
          # fail if any didn't land — that would be a real regression we
          # want loud.
          for tbl in delegations workspaces activity_logs pending_uploads; do
-            if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
+            if ! psql -h localhost -U postgres -d molecule -tA \
                -c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
                | grep -q 1; then
              echo "::error::$tbl table missing after migration replay — handler integration tests would be meaningless"
@ -221,32 +156,16 @@ jobs:

      - if: needs.detect-changes.outputs.handlers == 'true'
        name: Run integration tests
+        env:
+          INTEGRATION_DB_URL: postgres://postgres:test@localhost:5432/molecule?sslmode=disable
        run: |
-          # INTEGRATION_DB_URL is exported by the start-postgres step;
-          # points at the per-run bridge IP, not 127.0.0.1, so concurrent
-          # workflow runs don't fight over a host-net 5432 port.
          go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"

-      - if: failure() && needs.detect-changes.outputs.handlers == 'true'
+      - if: needs.detect-changes.outputs.handlers == 'true' && failure()
        name: Diagnostic dump on failure
        env:
          PGPASSWORD: test
        run: |
-          echo "::group::postgres container status"
-          docker ps -a --filter "name=${PG_NAME}" --format '{{.Status}} {{.Names}}' || true
-          docker logs "${PG_NAME}" 2>&1 | tail -50 || true
-          echo "::endgroup::"
          echo "::group::delegations table state"
-          psql -h "${PG_HOST}" -U postgres -d molecule -c "SELECT * FROM delegations LIMIT 50;" || true
+          psql -h localhost -U postgres -d molecule -c "SELECT * FROM delegations LIMIT 50;" || true
          echo "::endgroup::"
-
-      - if: always() && needs.detect-changes.outputs.handlers == 'true'
-        name: Stop sibling Postgres
-        working-directory: .
-        run: |
-          # always() so containers don't leak when migrations or tests
-          # fail. The cleanup is best-effort: if the container is
-          # already gone (e.g. concurrent rerun race), don't fail the job.
-          docker rm -f "${PG_NAME}" >/dev/null 2>&1 || true
-          echo "Cleaned up ${PG_NAME}"
-
--- a/.github/workflows/harness-replays.yml
+++ b/.github/workflows/harness-replays.yml
@ -98,66 +98,6 @@ jobs:
      # github-app-auth sibling-checkout removed 2026-05-07 (#157):
      # the plugin was dropped + Dockerfile.tenant no longer COPYs it.

-      # Pre-clone manifest deps before docker compose builds the tenant
-      # image (Task #173 followup — same pattern as
-      # publish-workspace-server-image.yml's "Pre-clone manifest deps"
-      # step).
-      #
-      # Why pre-clone here too: tests/harness/compose.yml builds tenant-alpha
-      # and tenant-beta from workspace-server/Dockerfile.tenant with
-      # context=../.. (repo root). That Dockerfile expects
-      # .tenant-bundle-deps/{workspace-configs-templates,org-templates,plugins}
-      # to be present at build context root (post-#173 it COPYs from there
-      # instead of running an in-image clone — the in-image clone failed
-      # with "could not read Username for https://git.moleculesai.app"
-      # because there's no auth path inside the build sandbox).
-      #
-      # Without this step harness-replays fails before any replay runs,
-      # with `failed to calculate checksum of ref ...
-      # "/.tenant-bundle-deps/plugins": not found`. Caught by run #892
-      # (main, 2026-05-07T20:28:53Z) and run #964 (staging — same
-      # symptom, different root cause: staging still has the in-image
-      # clone path, hits the auth error directly).
-      #
-      # 2026-05-08 sub-finding (#192): the clone step ALSO fails when
-      # any referenced workspace-template repo is private and the
-      # AUTO_SYNC_TOKEN bearer (devops-engineer persona) lacks read
-      # access. Root cause: 5 of 9 workspace-template repos
-      # (openclaw, codex, crewai, deepagents, gemini-cli) had been
-      # marked private with no team grant. Resolution: flipped them
-      # to public per `feedback_oss_first_repo_visibility_default`
-      # (the OSS surface should be public). Layer-3 (customer-private +
-      # marketplace third-party repos) tracked separately in
-      # internal#102.
-      #
-      # Token shape matches publish-workspace-server-image.yml: AUTO_SYNC_TOKEN
-      # is the devops-engineer persona PAT, NOT the founder PAT (per
-      # `feedback_per_agent_gitea_identity_default`). clone-manifest.sh
-      # embeds it as basic-auth for the duration of the clones and strips
-      # .git directories — the token never enters the resulting image.
-      - name: Pre-clone manifest deps
-        if: needs.detect-changes.outputs.run == 'true'
-        env:
-          MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
-        run: |
-          set -euo pipefail
-          if [ -z "${MOLECULE_GITEA_TOKEN}" ]; then
-            echo "::error::AUTO_SYNC_TOKEN secret is empty — register the devops-engineer persona PAT in repo Actions secrets"
-            exit 1
-          fi
-          mkdir -p .tenant-bundle-deps
-          bash scripts/clone-manifest.sh \
-            manifest.json \
-            .tenant-bundle-deps/workspace-configs-templates \
-            .tenant-bundle-deps/org-templates \
-            .tenant-bundle-deps/plugins
-          # Sanity-check counts so a silent partial clone fails fast
-          # instead of producing a half-empty image.
-          ws_count=$(find .tenant-bundle-deps/workspace-configs-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
-          org_count=$(find .tenant-bundle-deps/org-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
-          plugins_count=$(find .tenant-bundle-deps/plugins -mindepth 1 -maxdepth 1 -type d | wc -l)
-          echo "Cloned: ws=$ws_count org=$org_count plugins=$plugins_count"
-
      - name: Install Python deps for replays
        # peer-discovery-404 (and future replays) eval Python against the
        # running tenant — importing workspace/a2a_client.py pulls in
--- a/.github/workflows/pr-guards.yml
+++ b/.github/workflows/pr-guards.yml
@ -1,25 +1,14 @@
 name: pr-guards

-# PR-time guards. Today the only guard is "disable auto-merge when a
-# new commit is pushed after auto-merge was enabled" — added 2026-04-27
-# after PR #2174 auto-merged with only its first commit because the
-# second commit was pushed after the merge queue had locked the PR's
-# SHA.
+# Thin caller that delegates to the molecule-ci reusable guard. Today
+# the guard is just "disable auto-merge when a new commit is pushed
+# after auto-merge was enabled" — added 2026-04-27 after PR #2174
+# auto-merged with only its first commit because the second commit
+# was pushed after the merge queue had locked the PR's SHA.
 #
-# Why this is inlined (not delegated to molecule-ci's reusable
-# workflow): the reusable workflow uses `gh pr merge --disable-auto`,
-# which calls GitHub's GraphQL API. Gitea has no GraphQL endpoint and
-# returns HTTP 405 on /api/graphql, so the job failed on every Gitea
-# PR push since the 2026-05-06 migration. Gitea also has no `--auto`
-# merge primitive that this job could be acting on, so the right
-# behaviour on Gitea is "no-op + green status" — not a 405.
-#
-# Inlining (vs. an `if:` on the `uses:` line) keeps the job ALWAYS
-# running, which matters for branch protection: required-check names
-# need a job that emits SUCCESS terminal state, not SKIPPED. See
-# `feedback_branch_protection_check_name_parity` and `feedback_pr_merge_safety_guards`.
-#
-# Issue #88 item 1.
+# When more PR-time guards land in molecule-ci, add them here as
+# additional jobs that share the same pull_request:synchronize
+# trigger.

 on:
  pull_request:
@ -30,34 +19,4 @@ permissions:

 jobs:
  disable-auto-merge-on-push:
-    runs-on: ubuntu-latest
-    steps:
-      # Detect Gitea Actions. act_runner sets GITEA_ACTIONS=true in the
-      # step env on every job. Belt-and-suspenders: also check the repo
-      # url's host, which is independent of any runner-side env config
-      # (covers a future Gitea host where the env var is forgotten).
-      - name: Detect runner host
-        id: host
-        run: |
-          if [[ "${GITEA_ACTIONS:-}" == "true" ]] || [[ "${{ github.server_url }}" == *moleculesai.app* ]] || [[ "${{ github.event.repository.html_url }}" == *moleculesai.app* ]]; then
-            echo "is_gitea=true" >> "$GITHUB_OUTPUT"
-            echo "::notice::Gitea Actions detected — auto-merge gating is not applicable here (Gitea has no --auto merge primitive). Job will no-op."
-          else
-            echo "is_gitea=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Disable auto-merge (GitHub only)
-        if: steps.host.outputs.is_gitea != 'true'
-        env:
-          GH_TOKEN: ${{ github.token }}
-          PR: ${{ github.event.pull_request.number }}
-          REPO: ${{ github.repository }}
-          NEW_SHA: ${{ github.sha }}
-        run: |
-          set -eu
-          gh pr merge "$PR" --disable-auto -R "$REPO" || true
-          gh pr comment "$PR" -R "$REPO" --body "🔒 Auto-merge disabled — new commit (\`${NEW_SHA:0:7}\`) pushed after auto-merge was enabled. The merge queue locks SHAs at entry, so subsequent pushes can race. Verify the new commit and re-enable with \`gh pr merge --auto\`."
-
-      - name: Gitea no-op
-        if: steps.host.outputs.is_gitea == 'true'
-        run: echo "Gitea Actions — auto-merge gating not applicable; no-op (job intentionally green so branch protection's required-check name lands SUCCESS)."
+    uses: Molecule-AI/molecule-ci/.github/workflows/disable-auto-merge-on-push.yml@main
--- a/.github/workflows/publish-runtime.yml
+++ b/.github/workflows/publish-runtime.yml
@ -25,7 +25,7 @@ name: publish-runtime
 #   3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC).
 #      No static API token is stored — PyPI verifies the workflow's
 #      OIDC claim against the trusted-publisher config registered for
-#      molecule-ai-workspace-runtime (molecule-ai/molecule-core,
+#      molecule-ai-workspace-runtime (Molecule-AI/molecule-core,
 #      publish-runtime.yml, environment pypi-publish).
 #
 # After publish: the 8 template repos pick up the new version on their
@ -166,7 +166,7 @@ jobs:

      - name: Publish to PyPI (Trusted Publisher / OIDC)
        # PyPI side is configured: project molecule-ai-workspace-runtime →
-        # publisher molecule-ai/molecule-core, workflow publish-runtime.yml,
+        # publisher Molecule-AI/molecule-core, workflow publish-runtime.yml,
        # environment pypi-publish. The action mints a short-lived OIDC
        # token and exchanges it for a PyPI upload credential — no static
        # API token in this repo's secrets.
--- a/.github/workflows/publish-workspace-server-image.yml
+++ b/.github/workflows/publish-workspace-server-image.yml
@ -37,7 +37,6 @@ on:
      - 'workspace-server/**'
      - 'canvas/**'
      - 'manifest.json'
-      - 'scripts/**'
      - '.github/workflows/publish-workspace-server-image.yml'
  workflow_dispatch:

@ -75,87 +74,33 @@ jobs:
      # plugin was dropped + workspace-server/Dockerfile no longer
      # COPYs it.

-      # ECR auth + buildx setup are now inline in each build step
-      # below (Task #173, 2026-05-07).
-      #
-      # Why moved inline: aws-actions/configure-aws-credentials@v4 +
-      # aws-actions/amazon-ecr-login@v2 + docker/setup-buildx-action
-      # all left auth state in places that the actual `docker push`
-      # couldn't see on Gitea Actions:
-      #   - The actions wrote to a step-scoped DOCKER_CONFIG path
-      #     that didn't survive into subsequent shell steps.
-      #   - Buildx couldn't bridge the runner container ↔
-      #     operator-host docker daemon auth gap (401 on the
-      #     docker-container driver, "no basic auth credentials"
-      #     with the action-driven login).
-      #
-      # Doing AWS+ECR auth inline (`aws ecr get-login-password |
-      # docker login`) in the same shell step as `docker build` +
-      # `docker push` is the operator-host manual approach, mapped
-      # 1:1 into CI. Auth state is guaranteed to live in the env that
-      # `docker push` actually runs from.
-      #
-      # Post-suspension target is the operator's ECR org
-      # (153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*),
-      # which already hosts platform-tenant + workspace-template-* +
-      # runner-base images. AWS creds come from the
-      # AWS_ACCESS_KEY_ID/SECRET secrets bound to the molecule-cp
-      # IAM user. Closes #161.
+      - name: Configure AWS credentials for ECR
+        # GHCR was the pre-suspension target; the molecule-ai org on
+        # GitHub got swept 2026-05-06 and ghcr.io/molecule-ai/* is no
+        # longer reachable. Post-suspension target is the operator's
+        # ECR org (153263036946.dkr.ecr.us-east-2.amazonaws.com/
+        # molecule-ai/*), which already hosts platform-tenant +
+        # workspace-template-* + runner-base images. AWS creds come
+        # from the AWS_ACCESS_KEY_ID/SECRET secrets bound to the
+        # molecule-cp IAM user. Closes #161.
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: us-east-2
+
+      - name: Log in to ECR
+        id: ecr-login
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0

      - name: Compute tags
        id: tags
        run: |
          echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"

-      # Pre-clone manifest deps before docker build (Task #173 fix).
-      #
-      # Why pre-clone: post-2026-05-06, every workspace-template-* repo on
-      # Gitea (codex, crewai, deepagents, gemini-cli, langgraph) plus all
-      # 7 org-template-* repos are private. The pre-fix Dockerfile.tenant
-      # ran `git clone` inside an in-image stage, which had no auth path
-      # — every CI build failed with "fatal: could not read Username for
-      # https://git.moleculesai.app". For weeks, every workspace-server
-      # rebuild required a manual operator-host push. Now we clone in the
-      # trusted CI context (where AUTO_SYNC_TOKEN is naturally available)
-      # and Dockerfile.tenant just COPYs from .tenant-bundle-deps/.
-      #
-      # Token shape: AUTO_SYNC_TOKEN is the devops-engineer persona PAT
-      # (see /etc/molecule-bootstrap/agent-secrets.env). Per saved memory
-      # `feedback_per_agent_gitea_identity_default`, every CI surface uses
-      # a per-persona token, never the founder PAT. clone-manifest.sh
-      # embeds it as basic-auth (oauth2:<token>) for the duration of the
-      # clones, then strips .git directories — the token never enters
-      # the resulting image.
-      #
-      # Idempotent: if a re-run finds populated dirs, clone-manifest.sh
-      # skips them; safe to retrigger via path-filter or workflow_dispatch.
-      - name: Pre-clone manifest deps
-        env:
-          MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
-        run: |
-          set -euo pipefail
-          if [ -z "${MOLECULE_GITEA_TOKEN}" ]; then
-            echo "::error::AUTO_SYNC_TOKEN secret is empty — register the devops-engineer persona PAT in repo Actions secrets"
-            exit 1
-          fi
-          mkdir -p .tenant-bundle-deps
-          bash scripts/clone-manifest.sh \
-            manifest.json \
-            .tenant-bundle-deps/workspace-configs-templates \
-            .tenant-bundle-deps/org-templates \
-            .tenant-bundle-deps/plugins
-          # Sanity-check counts so a silent partial clone fails fast
-          # instead of producing a half-empty image.
-          ws_count=$(find .tenant-bundle-deps/workspace-configs-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
-          org_count=$(find .tenant-bundle-deps/org-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
-          plugins_count=$(find .tenant-bundle-deps/plugins -mindepth 1 -maxdepth 1 -type d | wc -l)
-          echo "Cloned: ws=$ws_count org=$org_count plugins=$plugins_count"
-          # Counts are derived from manifest.json (9 ws / 7 org / 21
-          # plugins as of 2026-05-07). If manifest.json grows but the
-          # clone step regresses silently, the find above caps at the
-          # actual disk state — but clone-manifest.sh's own EXPECTED vs
-          # CLONED check (line ~95) is the authoritative fail-fast.
-
      # Canary-gated release flow:
      #   - This step always publishes :staging-<sha> + :staging-latest.
      #   - On staging push, staging-CP picks up :staging-latest immediately
@ -181,82 +126,58 @@ jobs:
      # were running pre-RFC code. Adding the staging trigger above closes
      # that gap. Earlier 2026-04-24 incident: a static :staging-<sha> pin
      # drifted 10 days behind staging — same class of bug, different
-      # mechanism. ECR repo molecule-ai/platform created 2026-05-07.
-      # Build + push platform image with plain `docker` (no buildx).
-      # GIT_SHA bakes into the Go binary via -ldflags so /buildinfo
-      # returns it at runtime — see Dockerfile + buildinfo/buildinfo.go.
-      # The OCI revision label below carries the same value for registry
-      # tooling; the duplication is intentional.
-      - name: Build & push platform image to ECR (staging-<sha> + staging-latest)
-        env:
-          IMAGE_NAME: ${{ env.IMAGE_NAME }}
-          TAG_SHA: staging-${{ steps.tags.outputs.sha }}
-          TAG_LATEST: staging-latest
-          GIT_SHA: ${{ github.sha }}
-          REPO: ${{ github.repository }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          AWS_DEFAULT_REGION: us-east-2
-        run: |
-          set -euo pipefail
-          # ECR auth in-step so config.json is populated in the same
-          # shell env that runs `docker push`. ECR get-login-password
-          # tokens last 12h, plenty for a single-step build+push.
-          ECR_REGISTRY="${IMAGE_NAME%%/*}"
-          aws ecr get-login-password --region us-east-2 | \
-            docker login --username AWS --password-stdin "${ECR_REGISTRY}"
-          docker build \
-            --file ./workspace-server/Dockerfile \
-            --build-arg GIT_SHA="${GIT_SHA}" \
-            --label "org.opencontainers.image.source=https://github.com/${REPO}" \
-            --label "org.opencontainers.image.revision=${GIT_SHA}" \
-            --label "org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify" \
-            --tag "${IMAGE_NAME}:${TAG_SHA}" \
-            --tag "${IMAGE_NAME}:${TAG_LATEST}" \
-            .
-          docker push "${IMAGE_NAME}:${TAG_SHA}"
-          docker push "${IMAGE_NAME}:${TAG_LATEST}"
-
-      # Canvas uses same-origin fetches. The tenant Go platform
-      # reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL
-      # env; the tenant's /canvas/viewport, /approvals/pending,
-      # /org/templates etc. live on the tenant platform itself.
-      # Both legs share one origin (the tenant subdomain) so
-      # PLATFORM_URL="" forces canvas to fetch paths as relative,
-      # which land same-origin.
-      #
-      # Self-hosted / private-label deployments override this at
-      # build time with a specific backend (e.g. local dev:
-      # NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080).
-      - name: Build & push tenant image to ECR (staging-<sha> + staging-latest)
-        env:
-          TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
-          TAG_SHA: staging-${{ steps.tags.outputs.sha }}
-          TAG_LATEST: staging-latest
-          GIT_SHA: ${{ github.sha }}
-          REPO: ${{ github.repository }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          AWS_DEFAULT_REGION: us-east-2
-        run: |
-          set -euo pipefail
-          # Re-login: the platform-image step's docker login wrote to
-          # the same config.json, so this is technically redundant — but
-          # making each push step self-contained keeps the workflow
-          # robust to step reordering / future extraction.
-          ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
-          aws ecr get-login-password --region us-east-2 | \
-            docker login --username AWS --password-stdin "${ECR_REGISTRY}"
-          docker build \
-            --file ./workspace-server/Dockerfile.tenant \
-            --build-arg NEXT_PUBLIC_PLATFORM_URL= \
-            --build-arg GIT_SHA="${GIT_SHA}" \
-            --label "org.opencontainers.image.source=https://github.com/${REPO}" \
-            --label "org.opencontainers.image.revision=${GIT_SHA}" \
-            --label "org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify" \
-            --tag "${TENANT_IMAGE_NAME}:${TAG_SHA}" \
-            --tag "${TENANT_IMAGE_NAME}:${TAG_LATEST}" \
-            .
-          docker push "${TENANT_IMAGE_NAME}:${TAG_SHA}"
-          docker push "${TENANT_IMAGE_NAME}:${TAG_LATEST}"
+      # mechanism.
+      - name: Build & push platform image to GHCR (staging-<sha> + staging-latest)
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+        with:
+          context: .
+          file: ./workspace-server/Dockerfile
+          platforms: linux/amd64
+          push: true
+          tags: |
+            ${{ env.IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
+            ${{ env.IMAGE_NAME }}:staging-latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          # GIT_SHA bakes into the Go binary via -ldflags so /buildinfo
+          # returns it at runtime — see Dockerfile + buildinfo/buildinfo.go.
+          # This is the same value as the OCI revision label below; passing
+          # it twice is intentional, the OCI label is for registry tooling
+          # while /buildinfo is for the redeploy verification step.
+          build-args: |
+            GIT_SHA=${{ github.sha }}
+          labels: |
+            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.revision=${{ github.sha }}
+            org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify

+      - name: Build & push tenant image to GHCR (staging-<sha> + staging-latest)
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+        with:
+          context: .
+          file: ./workspace-server/Dockerfile.tenant
+          platforms: linux/amd64
+          push: true
+          tags: |
+            ${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
+            ${{ env.TENANT_IMAGE_NAME }}:staging-latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          # Canvas uses same-origin fetches. The tenant Go platform
+          # reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL
+          # env; the tenant's /canvas/viewport, /approvals/pending,
+          # /org/templates etc. live on the tenant platform itself.
+          # Both legs share one origin (the tenant subdomain) so
+          # PLATFORM_URL="" forces canvas to fetch paths as relative,
+          # which land same-origin.
+          #
+          # Self-hosted / private-label deployments override this at
+          # build time with a specific backend (e.g. local dev:
+          # NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080).
+          build-args: |
+            NEXT_PUBLIC_PLATFORM_URL=
+            GIT_SHA=${{ github.sha }}
+          labels: |
+            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.revision=${{ github.sha }}
+            org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify
--- a/.github/workflows/redeploy-tenants-on-main.yml
+++ b/.github/workflows/redeploy-tenants-on-main.yml
@ -9,7 +9,7 @@ name: redeploy-tenants-on-main
 #
 # This workflow closes the gap by calling the control-plane admin
 # endpoint that performs a canary-first, batched, health-gated rolling
-# redeploy across every live tenant. Implemented in molecule-ai/
+# redeploy across every live tenant. Implemented in Molecule-AI/
 # molecule-controlplane as POST /cp/admin/tenants/redeploy-fleet
 # (feat/tenant-auto-redeploy, landing alongside this workflow).
 #
@ -146,7 +146,7 @@ jobs:

      - name: Call CP redeploy-fleet
        # CP_ADMIN_API_TOKEN must be set as a repo/org secret on
-        # molecule-ai/molecule-core, matching the staging/prod CP's
+        # Molecule-AI/molecule-core, matching the staging/prod CP's
        # CP_ADMIN_API_TOKEN env. Stored in Railway, mirrored to this
        # repo's secrets for CI.
        env:
--- a/.github/workflows/redeploy-tenants-on-staging.yml
+++ b/.github/workflows/redeploy-tenants-on-staging.yml
@ -36,7 +36,7 @@ on:
  workflow_run:
    workflows: ['publish-workspace-server-image']
    types: [completed]
-    branches: [main]
+    branches: [staging]
  workflow_dispatch:
    inputs:
      target_tag:
@ -97,7 +97,7 @@ jobs:

      - name: Call staging-CP redeploy-fleet
        # CP_STAGING_ADMIN_API_TOKEN must be set as a repo/org secret
-        # on molecule-ai/molecule-core, matching staging-CP's
+        # on Molecule-AI/molecule-core, matching staging-CP's
        # CP_ADMIN_API_TOKEN env var (visible in Railway controlplane
        # / staging environment). Stored separately from the prod
        # CP_ADMIN_API_TOKEN so a leak of one doesn't auth the other.
--- a/.github/workflows/retarget-main-to-staging.yml
+++ b/.github/workflows/retarget-main-to-staging.yml
@ -0,0 +1,105 @@
+name: Retarget main PRs to staging
+
+# Mechanical enforcement of SHARED_RULES rule 8 ("Staging-first workflow, no
+# exceptions"). When a bot opens a PR against main, retarget it to staging
+# automatically and leave an explanatory comment. Human CEO-authored PRs (the
+# staging→main promotion PR, etc.) are left alone — they're the authorised
+# exception to the rule.
+#
+# Why an Action instead of only a prompt rule: prompt rules depend on every
+# role's system-prompt.md staying in sync. Today 5 of 8 engineer roles
+# (core-be, core-fe, app-fe, app-qa, devops-engineer) don't have the
+# staging-first section — the bot keeps opening PRs to main. An Action
+# enforces the invariant regardless of prompt drift.
+
+on:
+  pull_request_target:
+    types: [opened, reopened]
+    branches: [main]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  retarget:
+    name: Retarget to staging
+    runs-on: ubuntu-latest
+    # Only fire for bot-authored PRs. Human CEO PRs (staging→main promotion)
+    # are intentional and pass through.
+    #
+    # Head-ref guard: never retarget a PR whose head IS `staging` — those
+    # are the auto-promote staging→main PRs (opened by molecule-ai[bot]
+    # since #2586 switched to an App token, which now passes the bot
+    # filter below). Retargeting head=staging onto base=staging fails
+    # with HTTP 422 "no new commits between base 'staging' and head
+    # 'staging'", which used to surface as a noisy red workflow run on
+    # every auto-promote (caught 2026-05-03 on PR #2588).
+    if: >-
+      github.event.pull_request.head.ref != 'staging'
+      && (
+        github.event.pull_request.user.type == 'Bot'
+        || endsWith(github.event.pull_request.user.login, '[bot]')
+        || github.event.pull_request.user.login == 'app/molecule-ai'
+        || github.event.pull_request.user.login == 'molecule-ai[bot]'
+      )
+    steps:
+      - name: Retarget PR base to staging
+        id: retarget
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        # Issue #1884: when the bot opens a PR against main and there's
+        # already another PR on the same head branch targeting staging,
+        # GitHub's PATCH /pulls returns 422 with
+        # "A pull request already exists for base branch 'staging' …".
+        # The retarget can't proceed — but the right response is to
+        # close the now-redundant main-PR, not to fail the workflow
+        # noisily. Detect that specific 422 and close instead.
+        run: |
+          set +e
+          echo "Retargeting PR #${PR_NUMBER} (author: ${PR_AUTHOR}) from main → staging"
+          PATCH_OUTPUT=$(gh api -X PATCH \
+            "repos/${{ github.repository }}/pulls/${PR_NUMBER}" \
+            -f base=staging \
+            --jq '.base.ref' 2>&1)
+          PATCH_EXIT=$?
+          set -e
+          if [ "$PATCH_EXIT" -eq 0 ]; then
+            echo "::notice::Retargeted PR #${PR_NUMBER} → staging"
+            echo "outcome=retargeted" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          # Specifically match the 422 duplicate-base/head error so
+          # any OTHER PATCH failure (auth, deleted PR, etc.) still
+          # surfaces as a real workflow failure.
+          if echo "$PATCH_OUTPUT" | grep -q "pull request already exists for base branch 'staging'"; then
+            echo "::notice::PR #${PR_NUMBER}: duplicate target-staging PR exists on same head — closing this main-PR as redundant."
+            gh pr close "$PR_NUMBER" \
+              --repo "${{ github.repository }}" \
+              --comment "[retarget-bot] Closing — another PR on the same head branch already targets \`staging\`. This PR is redundant. See issue #1884 for the rationale."
+            echo "outcome=closed-as-duplicate" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          echo "::error::Retarget PATCH failed and was NOT a duplicate-base error:"
+          echo "$PATCH_OUTPUT" >&2
+          exit 1
+
+      - name: Post explainer comment
+        if: steps.retarget.outputs.outcome == 'retargeted'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          gh pr comment "$PR_NUMBER" \
+            --repo "${{ github.repository }}" \
+            --body "$(cat <<'BODY'
+          [retarget-bot] This PR was opened against `main` and has been retargeted to `staging` automatically.
+
+          **Why:** per [SHARED_RULES rule 8](https://git.moleculesai.app/molecule-ai/molecule-ai-org-template-molecule-dev/blob/main/SHARED_RULES.md), all feature work targets `staging` first; the CEO promotes `staging → main` separately.
+
+          **What changed:** just the base branch — no code change. CI will re-run against `staging`. If you get merge conflicts, rebase on `staging`.
+
+          **If this PR is the CEO's staging→main promotion:** the Action skipped you (only bot-authored PRs are retargeted). If you see this comment on your CEO PR, that's a bug — please tag @HongmingWang-Rabbit.
+          BODY
+          )"
--- a/.github/workflows/secret-scan.yml
+++ b/.github/workflows/secret-scan.yml
@ -12,7 +12,7 @@ name: Secret scan
 #
 #   jobs:
 #     secret-scan:
-#       uses: molecule-ai/molecule-core/.github/workflows/secret-scan.yml@staging
+#       uses: Molecule-AI/molecule-core/.github/workflows/secret-scan.yml@staging
 #
 # Pin to @staging not @main — staging is the active default branch,
 # main lags via the staging-promotion workflow. Updates ride along
--- a/.gitignore
+++ b/.gitignore
@ -131,13 +131,6 @@ backups/
 # Cloned by publish-workspace-server-image.yml so the Dockerfile's
 # replace-directive path resolves. Lives in its own repo.
 /molecule-ai-plugin-github-app-auth/
-# Tenant-image build context — populated by the workflow's
-# "Pre-clone manifest deps" step. Mirrors the public manifest, holds the
-# same content as the three /<>/ dirs above but namespaced under one
-# parent so the Docker build context is a single COPY-friendly tree.
-# Each entry is a transient working-dir, never source-of-truth, never
-# committed.
-/.tenant-bundle-deps/

 # Internal-flavored content lives in Molecule-AI/internal — NEVER in this
 # public monorepo. Migrated 2026-04-23 (CEO directive). The CI workflow
--- a/28
+++ b/28
@ -1,28 +0,0 @@
-# Top-level Makefile — convenience wrappers around docker compose.
-#
-# Most molecule-core dev work happens via these shortcuts. CI doesn't
-# use this Makefile; CI calls docker compose / go test directly so the
-# Makefile can evolve without breaking the build.
-
-.PHONY: help dev up down logs build test
-
-help: ## Show this help.
-	@grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-12s\033[0m %s\n", $$1, $$2}'
-
-dev: ## Start the full stack with air hot-reload for the platform service.
-	docker compose -f docker-compose.yml -f docker-compose.dev.yml up
-
-up: ## Start the full stack in production-shape mode (no air, normal Dockerfile).
-	docker compose up
-
-down: ## Stop the stack and remove containers (volumes preserved).
-	docker compose down
-
-logs: ## Tail logs from all services (Ctrl-C to detach).
-	docker compose logs -f
-
-build: ## Force a fresh build of the platform image (no cache).
-	docker compose build --no-cache platform
-
-test: ## Run Go unit tests in workspace-server/.
-	cd workspace-server && go test -race ./...
--- a/canvas/.dockerignore
+++ b/canvas/.dockerignore
@ -1,10 +0,0 @@
-# Excluded from `docker build` context. Without this, the COPY . . step in
-# canvas/Dockerfile clobbers the freshly-installed node_modules with the
-# host's (potentially broken / wrong-arch) copy — the @tailwindcss/oxide
-# native binary disagreed and broke `next build`.
-node_modules
-.next
-.git
-*.log
-.env*
-!.env.example
--- a/canvas/Dockerfile
+++ b/canvas/Dockerfile
@ -1,11 +1,7 @@
 FROM node:22-alpine AS builder
 WORKDIR /app
 COPY package.json package-lock.json* ./
-# `npm ci` (not `install`) for lockfile-exact reproducibility.
-# `--include=optional` ensures the platform-specific @tailwindcss/oxide
-# native binary lands — without it, postcss fails with "Cannot read
-# properties of undefined (reading 'All')" at build time.
-RUN npm ci --include=optional
+RUN npm install
 COPY . .
 ARG NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080
 ARG NEXT_PUBLIC_WS_URL=ws://localhost:8080/ws
--- a/canvas/next.config.ts
+++ b/canvas/next.config.ts
@ -17,24 +17,6 @@ import { dirname, join } from "node:path";
 // update one heuristic. Production is unaffected: `output: "standalone"`
 // bakes resolved env into the build, and the marker file isn't shipped.
 loadMonorepoEnv();
-// Boot-time matched-pair guard for ADMIN_TOKEN / NEXT_PUBLIC_ADMIN_TOKEN.
-// When ADMIN_TOKEN is set on the workspace-server (server-side bearer
-// gate, wsauth_middleware.go ~L245), the canvas MUST send the matching
-// NEXT_PUBLIC_ADMIN_TOKEN as `Authorization: Bearer ...` on every API
-// call. If only one is set, every workspace API call 401s silently —
-// the canvas hydrates with empty data and the user sees a broken page
-// with no console hint about the auth-config mismatch.
-//
-// Pre-fix the matched-pair contract was descriptive only (a comment in
-// .env): future devs/agents could re-misconfigure with one of the two
-// unset and silently 401. Closes the post-PR-#174 self-review gap.
-//
-// Warn-only (not exit) — production canvas Docker images bake these
-// vars into the build at image-build time, and a missed pair there
-// would still emit the warning at runtime via the standalone server's
-// startup. Killing the process on misconfiguration would turn a
-// recoverable auth issue into a hard crashloop.
-checkAdminTokenPair();

 const nextConfig: NextConfig = {
  output: "standalone",
@ -75,43 +57,6 @@ function loadMonorepoEnv() {
  );
 }

-// Boot-time matched-pair guard. Runs after .env has been loaded so the
-// check sees the post-load state. The two env vars must be set or
-// unset together; one-without-the-other is the silent-401 footgun.
-//
-// Treats empty string ("") as unset. An explicitly-empty `KEY=` in
-// .env counts as set-to-empty in `process.env`, but for auth purposes
-// an empty bearer token is equivalent to no token — so both
-// `ADMIN_TOKEN=` and an unset ADMIN_TOKEN are equivalent relative to
-// the matched-pair invariant.
-//
-// Returns void; side effect is the console.error warning. Kept as a
-// separate function (exported) so a future test can reset env, call
-// this, and assert on captured stderr.
-export function checkAdminTokenPair(): void {
-  const serverSet = !!process.env.ADMIN_TOKEN;
-  const clientSet = !!process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  if (serverSet === clientSet) return;
-  // Distinct messages so the operator can tell which half is missing
-  // — the fix is symmetric (set the other one) but the diagnostic
-  // mentions which side is currently set so they don't have to grep.
-  if (serverSet && !clientSet) {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
-        "canvas will 401 against workspace-server because the bearer header " +
-        "is never attached. Set both to the same value, or unset both.",
-    );
-  } else {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
-        "workspace-server will reject the bearer because no AdminAuth gate " +
-        "is configured. Set both to the same value, or unset both.",
-    );
-  }
-}
-
 function findMonorepoRoot(start: string): string | null {
  let dir = start;
  for (let i = 0; i < 6; i++) {
--- a/canvas/src/app/layout.tsx
+++ b/canvas/src/app/layout.tsx
@ -3,7 +3,6 @@ import { cookies, headers } from "next/headers";
 import "./globals.css";
 import { AuthGate } from "@/components/AuthGate";
 import { CookieConsent } from "@/components/CookieConsent";
-import { PurchaseSuccessModal } from "@/components/PurchaseSuccessModal";
 import { ThemeProvider } from "@/lib/theme-provider";
 import {
  THEME_COOKIE,
@ -87,12 +86,6 @@ export default async function RootLayout({
              vercel preview URL, apex) pass through unchanged. */}
          <AuthGate>{children}</AuthGate>
          <CookieConsent />
-          {/* Demo Mock #1: post-purchase success toast. Mounted at the
-              layout level so it persists across page state transitions
-              (loading → hydrated → error) without being unmounted and
-              losing its open-state. Reads ?purchase_success=1 from the
-              URL on first paint, then strips the param. */}
-          <PurchaseSuccessModal />
        </ThemeProvider>
      </body>
    </html>
--- a/canvas/src/components/A2ATopologyOverlay.tsx
+++ b/canvas/src/components/A2ATopologyOverlay.tsx
@ -1,10 +1,9 @@
 'use client';

-import { useEffect, useMemo, useCallback, useRef } from "react";
+import { useEffect, useMemo, useCallback } from "react";
 import { type Edge, MarkerType } from "@xyflow/react";
 import { api } from "@/lib/api";
 import { useCanvasStore } from "@/store/canvas";
-import { useSocketEvent } from "@/hooks/useSocketEvent";
 import type { ActivityEntry } from "@/types/activity";

 // ── Constants ─────────────────────────────────────────────────────────────────
@ -12,6 +11,9 @@ import type { ActivityEntry } from "@/types/activity";
 /** 60-minute look-back window for delegation activity */
 export const A2A_WINDOW_MS = 60 * 60 * 1000;

+/** Polling interval — refresh edges every 60 seconds */
+export const A2A_POLL_MS = 60 * 1_000;
+
 /** Threshold for "hot" edges: < 5 minutes → animated + violet stroke */
 export const A2A_HOT_MS = 5 * 60 * 1_000;

@ -129,20 +131,6 @@ export function buildA2AEdges(
 * `a2aEdges`. Canvas.tsx merges these with topology edges and passes the
 * combined list to ReactFlow.
 *
- * Update shape (issue #61 Stage 2, replaces the 60s polling loop):
- *  - On mount (when showA2AEdges): one HTTP fan-out per visible workspace
- *    (delegation rows, 60-min window). Bootstraps the local row buffer.
- *  - Steady state: subscribes to ACTIVITY_LOGGED via useSocketEvent.
- *    Each delegation event from a visible workspace is appended to the
- *    buffer; edges are re-derived via the existing buildA2AEdges helper.
- *  - showA2AEdges toggle off: clears edges + buffer.
- *  - Visible-ID-set change: re-bootstraps so a freshly-shown workspace
- *    backfills its 60-min history (existing visibleIdsKey selector
- *    behaviour preserved — that's the 2026-05-04 render-loop fix).
- *
- * No interval poll. The singleton ReconnectingSocket already owns
- * reconnect / backoff / health-check; useSocketEvent inherits those.
- *
 * Mount this inside CanvasInner (no ReactFlow hook dependency).
 */
 export function A2ATopologyOverlay() {
@ -169,9 +157,7 @@ export function A2ATopologyOverlay() {
  // the symptom of this re-render storm.
  //
  // The fix is purely the dependency-stability change here; the fetch
-  // logic is unchanged. Post-#61 the polling-driven fetch is gone, but
-  // the visibleIdsKey gate is still required so a peer-discovery write
-  // doesn't trigger a wasteful re-bootstrap.
+  // logic is unchanged.
  const visibleIdsKey = useCanvasStore((s) =>
    s.nodes
      .filter((n) => !n.hidden)
@ -185,42 +171,16 @@ export function A2ATopologyOverlay() {
    [visibleIdsKey]
  );

-  // Local rolling buffer of delegation rows. Pruned by A2A_WINDOW_MS on
-  // each rebuild so a long-lived session doesn't accumulate unbounded
-  // history. The buffer's high-water mark is approximately:
-  //    visibleIds.length × bootstrap-fetch-limit (500) + WS arrivals
-  // Real-world ceiling: ~3000 entries at the 60-min boundary, all of
-  // which buildA2AEdges aggregates into at most N² edges.
-  const bufferRef = useRef<ActivityEntry[]>([]);
-  // visibleIdsRef gives the WS handler the latest visible-ID set without
-  // re-subscribing on every render. The bus listener is registered
-  // exactly once per mount; subscriber-side filtering reads from this ref.
-  const visibleIdsRef = useRef(visibleIds);
-  visibleIdsRef.current = visibleIds;
-
-  // Re-derive overlay edges from the current buffer + push to store.
-  // Prunes by A2A_WINDOW_MS first so memory stays bounded across long
-  // sessions and the aggregation cost stays O(window-size).
-  const recomputeAndPush = useCallback(() => {
-    const cutoff = Date.now() - A2A_WINDOW_MS;
-    bufferRef.current = bufferRef.current.filter(
-      (r) => new Date(r.created_at).getTime() > cutoff
-    );
-    setA2AEdges(buildA2AEdges(bufferRef.current));
-  }, [setA2AEdges]);
-
-  // Bootstrap fan-out — one HTTP per visible workspace. Replaces the
-  // 60s polling loop entirely. Race-aware: any WS arrivals that landed
-  // in the buffer DURING the fetch (between the await and resume) are
-  // preserved by id-dedup-with-fetched-first ordering.
-  const bootstrap = useCallback(async () => {
+  // Fetch delegation activity for all visible workspaces and rebuild overlay edges.
+  const fetchAndUpdate = useCallback(async () => {
    if (visibleIds.length === 0) {
-      bufferRef.current = [];
      setA2AEdges([]);
      return;
    }
    try {
-      const fetchedRows = (
+      // Fan-out — one request per visible workspace.
+      // Per-request failures are swallowed so one broken workspace doesn't blank the overlay.
+      const allRows = (
        await Promise.all(
          visibleIds.map((id) =>
            api
@ -232,76 +192,24 @@ export function A2ATopologyOverlay() {
        )
      ).flat();

-      // Merge: fetched rows first, then any in-flight WS arrivals that
-      // accumulated during the await. Dedup by id so rows that appear
-      // in both paths are not double-counted in the aggregation.
-      const merged = [...fetchedRows, ...bufferRef.current];
-      const seen = new Set<string>();
-      bufferRef.current = merged.filter((r) => {
-        if (seen.has(r.id)) return false;
-        seen.add(r.id);
-        return true;
-      });
-      recomputeAndPush();
+      setA2AEdges(buildA2AEdges(allRows));
    } catch {
      // Overlay failure is non-critical — canvas remains functional
    }
-  }, [visibleIds, setA2AEdges, recomputeAndPush]);
+  }, [visibleIds, setA2AEdges]);

  useEffect(() => {
    if (!showA2AEdges) {
-      // Clear edges + buffer immediately when toggled off
-      bufferRef.current = [];
+      // Clear edges immediately when toggled off
      setA2AEdges([]);
      return;
    }
-    void bootstrap();
-  }, [showA2AEdges, bootstrap, setA2AEdges]);

-  // Live-update path. Filters server-side ACTIVITY_LOGGED events down
-  // to delegation initiations from visible workspaces and appends each
-  // into the rolling buffer, re-deriving edges via buildA2AEdges.
-  //
-  // Only `method === "delegate"` rows count — the same filter
-  // buildA2AEdges applies — so delegate_result rows arriving over the
-  // wire don't double-count.
-  useSocketEvent((msg) => {
-    if (!showA2AEdges) return;
-    if (msg.event !== "ACTIVITY_LOGGED") return;
-
-    const p = (msg.payload || {}) as Record<string, unknown>;
-    if (p.activity_type !== "delegation") return;
-    if (p.method !== "delegate") return;
-
-    const wsId = msg.workspace_id;
-    if (!visibleIdsRef.current.includes(wsId)) return;
-
-    // Synthesise an ActivityEntry from the WS payload so buildA2AEdges
-    // (which the bootstrap path also feeds) handles it identically.
-    const entry: ActivityEntry = {
-      id:
-        (p.id as string) ||
-        `ws-push-${msg.timestamp || Date.now()}-${wsId}`,
-      workspace_id: wsId,
-      activity_type: "delegation",
-      source_id: (p.source_id as string | null) ?? null,
-      target_id: (p.target_id as string | null) ?? null,
-      method: "delegate",
-      summary: (p.summary as string | null) ?? null,
-      request_body: null,
-      response_body: null,
-      duration_ms: (p.duration_ms as number | null) ?? null,
-      status: (p.status as string) || "ok",
-      error_detail: null,
-      created_at:
-        (p.created_at as string) ||
-        msg.timestamp ||
-        new Date().toISOString(),
-    };
-
-    bufferRef.current = [...bufferRef.current, entry];
-    recomputeAndPush();
-  });
+    // Initial fetch, then poll every 60 s
+    void fetchAndUpdate();
+    const timer = setInterval(() => void fetchAndUpdate(), A2A_POLL_MS);
+    return () => clearInterval(timer);
+  }, [showA2AEdges, fetchAndUpdate, setA2AEdges]);

  // Pure side-effect — renders nothing
  return null;
--- a/canvas/src/components/CommunicationOverlay.tsx
+++ b/canvas/src/components/CommunicationOverlay.tsx
@ -3,7 +3,6 @@
 import { useState, useEffect, useCallback, useRef } from "react";
 import { useCanvasStore } from "@/store/canvas";
 import { api } from "@/lib/api";
-import { useSocketEvent } from "@/hooks/useSocketEvent";
 import { COMM_TYPE_LABELS } from "@/lib/design-tokens";

 interface Communication {
@ -19,71 +18,32 @@ interface Communication {
  durationMs: number | null;
 }

-/** Workspace-server `ACTIVITY_LOGGED` payload shape. Pulled out so the
- *  WS handler below has a typed view of the same fields the HTTP
- *  bootstrap consumes — drift between the two paths is a class of bug
- *  AgentCommsPanel hit historically. */
-interface ActivityLoggedPayload {
-  id?: string;
-  activity_type?: string;
-  source_id?: string | null;
-  target_id?: string | null;
-  workspace_id?: string;
-  summary?: string | null;
-  status?: string;
-  duration_ms?: number | null;
-  created_at?: string;
-}
-
-/** Fan-out cap for the bootstrap HTTP fetch on mount / on visibility
- *  re-open. Kept at 3 (carried over from the 2026-05-04 fix) so a
- *  freshly-mounted overlay on a 15-workspace tenant only spends 3
- *  round-trips bootstrapping. Live updates after that arrive via the
- *  WS subscription below — no polling, no fan-out to maintain. */
-const BOOTSTRAP_FAN_OUT_CAP = 3;
-
-/** Cap on the rendered list. Bootstrap + every WS push prepends, the
- *  list is sliced to this size after each update. Mirrors the prior
- *  polling-loop behaviour. */
-const COMMS_RENDER_CAP = 20;
-
 /**
 * Overlay showing recent A2A communications between workspaces.
- *
- * Update shape (issue #61 Stage 1, replaces the 30s polling loop):
- *  - On mount (when visible): one HTTP bootstrap per online workspace,
- *    capped at BOOTSTRAP_FAN_OUT_CAP. Yields the initial recent-comms
- *    window without waiting for live events.
- *  - Steady state: subscribes to ACTIVITY_LOGGED via useSocketEvent.
- *    Each event with a matching activity_type from a visible online
- *    workspace gets synthesised into a Communication and prepended.
- *  - Visibility re-open: re-bootstraps so the user sees the freshest
- *    window even if WS was idle while collapsed.
- *
- * No interval poll. The singleton ReconnectingSocket in `store/socket.ts`
- * already owns reconnect/backoff/health-check, and `useSocketEvent`
- * inherits those guarantees. If WS is genuinely unhealthy, the overlay
- * shows the bootstrap snapshot until the next visibility re-open or
- * the next WS reconnect (which fires its own rehydrate burst).
+ * Renders as a floating log panel that auto-updates.
 */
 export function CommunicationOverlay() {
  const [comms, setComms] = useState<Communication[]>([]);
  const [visible, setVisible] = useState(true);
  const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
  const nodes = useCanvasStore((s) => s.nodes);
-  // nodesRef gives the WS handler current node-name resolution without
-  // re-subscribing on every node-list change. The bus listener is
-  // registered exactly once per mount; subscriber-side filtering reads
-  // the latest value via this ref.
  const nodesRef = useRef(nodes);
  nodesRef.current = nodes;

-  const bootstrapComms = useCallback(async () => {
+  const fetchComms = useCallback(async () => {
    try {
+      // Fan-out cap: each polled workspace = 1 round-trip. The platform
+      // rate limits at 600 req/min/IP; combined with heartbeats + other
+      // canvas polling, every workspace polled here costs ~6 req/min
+      // (1 every 30s × 1 per workspace). Capping at 3 keeps this
+      // overlay's footprint at 18 req/min worst case — well under
+      // budget even with 8+ workspaces visible. Caught 2026-05-04 when
+      // a user with 8+ workspaces (Design Director + 6 sub-agents +
+      // 3 standalones) saw sustained 429s in canvas console.
      const onlineNodes = nodesRef.current.filter((n) => n.data.status === "online");
      const allComms: Communication[] = [];

-      for (const node of onlineNodes.slice(0, BOOTSTRAP_FAN_OUT_CAP)) {
+      for (const node of onlineNodes.slice(0, 3)) {
        try {
          const activities = await api.get<Array<{
            id: string;
@ -99,8 +59,8 @@ export function CommunicationOverlay() {

          for (const a of activities) {
            if (a.activity_type === "a2a_send" || a.activity_type === "a2a_receive") {
-              const sourceNode = nodesRef.current.find((n) => n.id === (a.source_id || a.workspace_id));
-              const targetNode = nodesRef.current.find((n) => n.id === (a.target_id || ""));
+              const sourceNode = nodes.find((n) => n.id === (a.source_id || a.workspace_id));
+              const targetNode = nodes.find((n) => n.id === (a.target_id || ""));
              allComms.push({
                id: a.id,
                sourceId: a.source_id || a.workspace_id,
@ -116,12 +76,11 @@ export function CommunicationOverlay() {
            }
          }
        } catch {
-          // Per-workspace failures must not blank the panel — the same
-          // robustness the polling version had.
+          // Skip workspaces that fail
        }
      }

-      // Newest-first with id-dedup, capped at COMMS_RENDER_CAP.
+      // Sort by timestamp, newest first, dedupe
      const seen = new Set<string>();
      const sorted = allComms
        .sort((a, b) => b.timestamp.localeCompare(a.timestamp))
@ -130,78 +89,29 @@ export function CommunicationOverlay() {
          seen.add(c.id);
          return true;
        })
-        .slice(0, COMMS_RENDER_CAP);
+        .slice(0, 20);

      setComms(sorted);
    } catch {
-      // Bootstrap failure is non-blocking — the WS subscription below
-      // will populate the panel as live events arrive.
+      // Silently handle API errors
    }
  }, []);

-  // Bootstrap once on mount + every time the user re-opens after a
-  // collapse. Closed-panel state intentionally drops live updates so
-  // the panel doesn't churn invisible state — the next open reloads.
  useEffect(() => {
+    // Gate polling on visibility — when the user collapses the overlay
+    // the data isn't being read, so the per-workspace fan-out becomes
+    // pure rate-limit overhead. Pre-fix this overlay polled regardless
+    // of whether the panel was shown, costing ~36 req/min from a
+    // hidden surface.
    if (!visible) return;
-    bootstrapComms();
-  }, [bootstrapComms, visible]);
-
-  // Live-update path. Filters server-side ACTIVITY_LOGGED events down
-  // to the comm-overlay-relevant subset and prepends each into the
-  // rendered list with the same dedup the bootstrap path uses.
-  //
-  // Scope guard: ignore events for workspaces not in the visible online
-  // set, so a user collapsing one workspace doesn't see its comms
-  // continue to scroll in. Same shape the bootstrap path applies.
-  useSocketEvent((msg) => {
-    if (!visible) return;
-    if (msg.event !== "ACTIVITY_LOGGED") return;
-
-    const p = (msg.payload || {}) as ActivityLoggedPayload;
-    const type = p.activity_type;
-    if (type !== "a2a_send" && type !== "a2a_receive" && type !== "task_update") return;
-
-    const wsId = msg.workspace_id;
-    const onlineSet = new Set(
-      nodesRef.current.filter((n) => n.data.status === "online").map((n) => n.id),
-    );
-    if (!onlineSet.has(wsId)) return;
-
-    const sourceId = p.source_id || wsId;
-    const targetId = p.target_id || "";
-    const sourceNode = nodesRef.current.find((n) => n.id === sourceId);
-    const targetNode = nodesRef.current.find((n) => n.id === targetId);
-
-    const incoming: Communication = {
-      id: p.id || `${msg.timestamp || Date.now()}:${sourceId}:${targetId}`,
-      sourceId,
-      targetId,
-      sourceName: sourceNode?.data.name || "Unknown",
-      targetName: targetNode?.data.name || "Unknown",
-      type: type as Communication["type"],
-      summary: p.summary || "",
-      status: p.status || "ok",
-      timestamp: p.created_at || msg.timestamp || new Date().toISOString(),
-      durationMs: p.duration_ms ?? null,
-    };
-
-    setComms((prev) => {
-      // Prepend, dedup by id, re-cap. Functional setState is necessary
-      // because two ACTIVITY_LOGGED events arriving in the same React
-      // batch would otherwise read a stale `comms` from the closure.
-      const seen = new Set<string>();
-      const merged = [incoming, ...prev]
-        .sort((a, b) => b.timestamp.localeCompare(a.timestamp))
-        .filter((c) => {
-          if (seen.has(c.id)) return false;
-          seen.add(c.id);
-          return true;
-        })
-        .slice(0, COMMS_RENDER_CAP);
-      return merged;
-    });
-  });
+    fetchComms();
+    // 30s cadence (was 10s). At 3-workspace fan-out that's 6 req/min
+    // worst case from this overlay. Combined with heartbeats (~30/min)
+    // and other canvas polling, leaves ample headroom under the 600/
+    // min/IP server-side rate limit even at 8+ workspace tenants.
+    const interval = setInterval(fetchComms, 30000);
+    return () => clearInterval(interval);
+  }, [fetchComms, visible]);

  if (!visible || comms.length === 0) {
    return (
--- a/canvas/src/components/PurchaseSuccessModal.tsx
+++ b/canvas/src/components/PurchaseSuccessModal.tsx
@ -1,175 +0,0 @@
-"use client";
-
-/**
- * PurchaseSuccessModal — demo-only post-purchase confirmation.
- *
- * Mounted on the canvas root (`app/page.tsx`). On first paint it inspects
- * `?purchase_success=1[&item=<name>]` on the current URL. If present, it
- * renders a centred modal styled after `ConfirmDialog`, schedules a 5s
- * auto-dismiss, and rewrites the URL via `history.replaceState` to drop
- * the params so a refresh after dismiss does NOT re-show the modal.
- *
- * Mock for the funding demo — there is no real billing surface behind
- * this. The marketplace "Purchase" button on the landing page redirects
- * here with the params; this modal is the only thing the user sees of
- * the "transaction".
- *
- * Styling matches the warm-paper @theme tokens (surface-sunken / line /
- * ink / good) so it tracks light + dark without per-mode overrides.
- */
-
-import { useEffect, useRef, useState } from "react";
-import { createPortal } from "react-dom";
-
-const AUTO_DISMISS_MS = 5000;
-
-function readPurchaseParams(): { open: boolean; item: string | null } {
-  if (typeof window === "undefined") return { open: false, item: null };
-  const sp = new URLSearchParams(window.location.search);
-  const flag = sp.get("purchase_success");
-  if (flag !== "1" && flag !== "true") return { open: false, item: null };
-  return { open: true, item: sp.get("item") };
-}
-
-function stripPurchaseParams() {
-  if (typeof window === "undefined") return;
-  const url = new URL(window.location.href);
-  url.searchParams.delete("purchase_success");
-  url.searchParams.delete("item");
-  // replaceState (not pushState) so back-button doesn't return to the
-  // pre-strip URL and re-trigger the modal.
-  window.history.replaceState({}, "", url.toString());
-}
-
-export function PurchaseSuccessModal() {
-  const [open, setOpen] = useState(false);
-  const [item, setItem] = useState<string | null>(null);
-  const [mounted, setMounted] = useState(false);
-  const dialogRef = useRef<HTMLDivElement>(null);
-
-  // Read the URL params once on mount. We don't subscribe to navigation —
-  // this modal is a one-shot for the demo redirect, not a persistent
-  // listener.
-  useEffect(() => {
-    setMounted(true);
-    const { open: shouldOpen, item: itemName } = readPurchaseParams();
-    if (shouldOpen) {
-      setOpen(true);
-      setItem(itemName);
-      // Clean the URL immediately so a refresh after the modal is closed
-      // (or even while it's still open) does NOT re-trigger it.
-      stripPurchaseParams();
-    }
-  }, []);
-
-  // Auto-dismiss timer + Escape handler.
-  useEffect(() => {
-    if (!open) return;
-    const t = window.setTimeout(() => setOpen(false), AUTO_DISMISS_MS);
-    const onKey = (e: KeyboardEvent) => {
-      if (e.key === "Escape") setOpen(false);
-    };
-    window.addEventListener("keydown", onKey);
-    // Focus the close button so keyboard users land on it after redirect.
-    const raf = requestAnimationFrame(() => {
-      dialogRef.current?.querySelector<HTMLButtonElement>("button")?.focus();
-    });
-    return () => {
-      window.clearTimeout(t);
-      window.removeEventListener("keydown", onKey);
-      cancelAnimationFrame(raf);
-    };
-  }, [open]);
-
-  if (!open || !mounted) return null;
-
-  const itemLabel = item ? decodeURIComponent(item) : "Your new agent";
-
-  return createPortal(
-    <div
-      className="fixed inset-0 z-[9999] flex items-center justify-center"
-      data-testid="purchase-success-modal"
-    >
-      {/* Backdrop — click closes, matches ConfirmDialog backdrop. */}
-      <div
-        className="absolute inset-0 bg-black/60 backdrop-blur-sm"
-        onClick={() => setOpen(false)}
-        aria-hidden="true"
-      />
-
-      <div
-        ref={dialogRef}
-        role="dialog"
-        aria-modal="true"
-        aria-labelledby="purchase-success-title"
-        className="relative bg-surface-sunken border border-line rounded-xl shadow-2xl shadow-black/50 max-w-[420px] w-full mx-4 overflow-hidden"
-      >
-        <div className="px-6 pt-6 pb-4">
-          <div className="flex items-start gap-4">
-            {/* Success glyph — uses --color-good so it tracks the theme.
-                Inline SVG over an emoji so it stays readable + on-brand
-                in both light and dark. */}
-            <div
-              className="flex h-10 w-10 flex-shrink-0 items-center justify-center rounded-full"
-              style={{
-                background:
-                  "color-mix(in srgb, var(--color-good) 15%, transparent)",
-                color: "var(--color-good)",
-              }}
-            >
-              <svg
-                width="22"
-                height="22"
-                viewBox="0 0 24 24"
-                fill="none"
-                aria-hidden="true"
-              >
-                <circle
-                  cx="12"
-                  cy="12"
-                  r="10"
-                  stroke="currentColor"
-                  strokeWidth="1.5"
-                />
-                <path
-                  d="M7.5 12.5L10.5 15.5L16.5 9.5"
-                  stroke="currentColor"
-                  strokeWidth="1.8"
-                  strokeLinecap="round"
-                  strokeLinejoin="round"
-                />
-              </svg>
-            </div>
-            <div className="flex-1">
-              <h3
-                id="purchase-success-title"
-                className="text-base font-semibold text-ink"
-              >
-                Purchase successful
-              </h3>
-              <p className="mt-1.5 text-[13px] leading-relaxed text-ink-mid">
-                <span className="font-medium text-ink">{itemLabel}</span> has
-                been added to your workspace. Provisioning starts in the
-                background — you can keep working while it spins up.
-              </p>
-            </div>
-          </div>
-        </div>
-
-        <div className="flex items-center justify-between gap-3 px-6 py-3 border-t border-line bg-surface/50">
-          <span className="font-mono text-[10.5px] uppercase tracking-[0.12em] text-ink-soft">
-            auto-dismiss · {AUTO_DISMISS_MS / 1000}s
-          </span>
-          <button
-            type="button"
-            onClick={() => setOpen(false)}
-            className="px-3.5 py-1.5 text-[13px] rounded-lg bg-accent hover:bg-accent-strong text-white transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken focus-visible:ring-accent/60"
-          >
-            Close
-          </button>
-        </div>
-      </div>
-    </div>,
-    document.body,
-  );
-}
--- a/canvas/src/components/tests/A2ATopologyOverlay.test.tsx
+++ b/canvas/src/components/tests/A2ATopologyOverlay.test.tsx
@ -41,10 +41,6 @@ vi.mock("@/store/canvas", () => ({
 // ── Imports (after mocks) ─────────────────────────────────────────────────────

 import { api } from "@/lib/api";
-import {
-  emitSocketEvent,
-  _resetSocketEventListenersForTests,
-} from "@/store/socket-events";
 import {
  buildA2AEdges,
  formatA2ARelativeTime,
@ -346,151 +342,6 @@ describe("A2ATopologyOverlay component", () => {
    expect(mockGet.mock.calls.length).toBe(callsAfterMount);
  });

-  // ── #61 Stage 2: ACTIVITY_LOGGED subscription tests ────────────────────────
-  //
-  // Pin the post-#61 behaviour: WS push for delegation contributes to
-  // the overlay's edge buffer with NO additional HTTP fetch. Same shape
-  // as Stage 1 (CommunicationOverlay).
-
-  describe("#61 stage 2 — ACTIVITY_LOGGED subscription", () => {
-    beforeEach(() => {
-      _resetSocketEventListenersForTests();
-    });
-    afterEach(() => {
-      _resetSocketEventListenersForTests();
-    });
-
-    function emitDelegation(overrides: {
-      workspaceId?: string;
-      sourceId?: string;
-      targetId?: string;
-      method?: string;
-      activityType?: string;
-    } = {}) {
-      // Use Date.now() (real time, fake-timer-frozen) rather than the
-      // hardcoded NOW constant — buildA2AEdges prunes by Date.now() -
-      // A2A_WINDOW_MS, so a row dated against the wrong epoch silently
-      // falls outside the window and the test fails for a confusing
-      // reason ("edges array empty" vs "filter dropped my row").
-      const realNow = Date.now();
-      emitSocketEvent({
-        event: "ACTIVITY_LOGGED",
-        workspace_id: overrides.workspaceId ?? "ws-a",
-        timestamp: new Date(realNow).toISOString(),
-        payload: {
-          id: `act-${Math.random().toString(36).slice(2)}`,
-          activity_type: overrides.activityType ?? "delegation",
-          method: overrides.method ?? "delegate",
-          source_id: overrides.sourceId ?? "ws-a",
-          target_id: overrides.targetId ?? "ws-b",
-          status: "ok",
-          created_at: new Date(realNow - 30_000).toISOString(),
-        },
-      });
-    }
-
-    it("does NOT poll on a 60s interval after bootstrap (post-#61)", async () => {
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      mockGet.mockResolvedValue([] as any);
-      render(<A2ATopologyOverlay />);
-      await act(async () => { await Promise.resolve(); });
-      const callsAfterBootstrap = mockGet.mock.calls.length;
-      expect(callsAfterBootstrap).toBe(2); // ws-a + ws-b
-
-      // Pre-#61: a 60s clock tick would fire a fresh fan-out (2 more
-      // calls). Post-#61: no interval, no extra calls.
-      await act(async () => {
-        vi.advanceTimersByTime(120_000);
-      });
-      expect(mockGet.mock.calls.length).toBe(callsAfterBootstrap);
-    });
-
-    it("WS push for a delegation event from a visible workspace updates edges with NO HTTP call", async () => {
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      mockGet.mockResolvedValue([] as any);
-      render(<A2ATopologyOverlay />);
-      await act(async () => { await Promise.resolve(); await Promise.resolve(); });
-      mockGet.mockClear();
-      mockStoreState.setA2AEdges.mockClear();
-
-      await act(async () => {
-        emitDelegation({ sourceId: "ws-a", targetId: "ws-b" });
-      });
-
-      // Edges-set called with at least one a2a edge for the new push.
-      const calls = mockStoreState.setA2AEdges.mock.calls;
-      expect(calls.length).toBeGreaterThanOrEqual(1);
-      const lastCall = calls[calls.length - 1][0] as Array<{ id: string }>;
-      expect(lastCall.some((e) => e.id === "a2a-ws-a-ws-b")).toBe(true);
-
-      // Critical: no HTTP fetch fired during the WS path.
-      expect(mockGet).not.toHaveBeenCalled();
-    });
-
-    it("WS push for a non-delegation activity_type is ignored", async () => {
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      mockGet.mockResolvedValue([] as any);
-      render(<A2ATopologyOverlay />);
-      await act(async () => { await Promise.resolve(); });
-      mockStoreState.setA2AEdges.mockClear();
-
-      await act(async () => {
-        emitDelegation({ activityType: "a2a_send" });
-      });
-
-      // setA2AEdges must not be called by the WS handler — the only
-      // setA2AEdges calls in this test came from the initial bootstrap.
-      expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
-    });
-
-    it("WS push for a delegate_result row is ignored (mirrors buildA2AEdges filter)", async () => {
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      mockGet.mockResolvedValue([] as any);
-      render(<A2ATopologyOverlay />);
-      await act(async () => { await Promise.resolve(); });
-      mockStoreState.setA2AEdges.mockClear();
-
-      await act(async () => {
-        emitDelegation({ method: "delegate_result" });
-      });
-
-      // delegate_result rows do not contribute to the edge count — they
-      // are completion signals, not initiations.
-      expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
-    });
-
-    it("WS push from a hidden workspace is ignored", async () => {
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      mockGet.mockResolvedValue([] as any);
-      render(<A2ATopologyOverlay />);
-      await act(async () => { await Promise.resolve(); });
-      mockStoreState.setA2AEdges.mockClear();
-
-      await act(async () => {
-        emitDelegation({ workspaceId: "ws-hidden" });
-      });
-
-      expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
-    });
-
-    it("WS push while showA2AEdges is false is ignored", async () => {
-      mockStoreState.showA2AEdges = false;
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      mockGet.mockResolvedValue([] as any);
-      render(<A2ATopologyOverlay />);
-      // The mount path with showA2AEdges=false calls setA2AEdges([])
-      // once — clear that to isolate the WS path.
-      mockStoreState.setA2AEdges.mockClear();
-
-      await act(async () => {
-        emitDelegation();
-      });
-
-      expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
-      expect(mockGet).not.toHaveBeenCalled();
-    });
-  });
-
  it("re-fetches when the visible ID set actually changes", async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    mockGet.mockResolvedValue([] as any);
--- a/canvas/src/components/tests/ActivityTab.test.tsx
+++ b/canvas/src/components/tests/ActivityTab.test.tsx
@ -36,10 +36,6 @@ vi.mock("@/hooks/useWorkspaceName", () => ({
  useWorkspaceName: () => () => "Test WS",
 }));

-import {
-  emitSocketEvent,
-  _resetSocketEventListenersForTests,
-} from "@/store/socket-events";
 import { ActivityTab } from "../tabs/ActivityTab";

 // ── Fixtures ──────────────────────────────────────────────────────────────────
@ -362,191 +358,6 @@ describe("ActivityTab — refresh button", () => {
  });
 });

-// ── Suite 6.5: ACTIVITY_LOGGED subscription (#61 stage 3) ─────────────────────
-//
-// Pin the post-#61 behaviour: WS push extends the rendered list with NO
-// additional HTTP fetch. The 5s polling loop is gone; live updates
-// arrive over the WebSocket bus.
-
-describe("ActivityTab — #61 stage 3: ACTIVITY_LOGGED subscription", () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-    mockGet.mockResolvedValue([]);
-    _resetSocketEventListenersForTests();
-  });
-  afterEach(() => {
-    cleanup();
-    _resetSocketEventListenersForTests();
-  });
-
-  function emitActivity(overrides: {
-    workspaceId?: string;
-    activityType?: string;
-    summary?: string;
-    id?: string;
-  } = {}) {
-    const realNow = Date.now();
-    emitSocketEvent({
-      event: "ACTIVITY_LOGGED",
-      workspace_id: overrides.workspaceId ?? "ws-1",
-      timestamp: new Date(realNow).toISOString(),
-      payload: {
-        id: overrides.id ?? `act-${Math.random().toString(36).slice(2)}`,
-        activity_type: overrides.activityType ?? "agent_log",
-        source_id: null,
-        target_id: null,
-        method: null,
-        summary: overrides.summary ?? "live-pushed",
-        status: "ok",
-        created_at: new Date(realNow - 5_000).toISOString(),
-      },
-    });
-  }
-
-  it("WS push for matching workspace prepends to the list with NO HTTP call", async () => {
-    render(<ActivityTab workspaceId="ws-1" />);
-    await waitFor(() => {
-      expect(screen.getByText(/0 activities|no activity/i)).toBeTruthy();
-    });
-    mockGet.mockClear();
-
-    await act(async () => {
-      emitActivity({ summary: "live-row-from-bus" });
-    });
-
-    await waitFor(() => {
-      expect(screen.getByText(/live-row-from-bus/)).toBeTruthy();
-    });
-    expect(mockGet).not.toHaveBeenCalled();
-  });
-
-  it("WS push for a different workspace is ignored", async () => {
-    render(<ActivityTab workspaceId="ws-1" />);
-    await waitFor(() => screen.getByText(/no activity/i));
-
-    await act(async () => {
-      emitActivity({
-        workspaceId: "ws-other",
-        summary: "should-not-render-other-ws",
-      });
-    });
-
-    expect(screen.queryByText(/should-not-render-other-ws/)).toBeNull();
-  });
-
-  it("WS push respects the active filter — non-matching activity_type is ignored", async () => {
-    render(<ActivityTab workspaceId="ws-1" />);
-    await waitFor(() => screen.getByText(/no activity/i));
-
-    // Apply "Tasks" filter.
-    clickButton(/tasks/i);
-    await waitFor(() => {
-      expect(
-        screen.getByRole("button", { name: /tasks/i }).getAttribute("aria-pressed"),
-      ).toBe("true");
-    });
-
-    // Push an a2a_send (does NOT match task_update filter).
-    await act(async () => {
-      emitActivity({
-        activityType: "a2a_send",
-        summary: "should-not-render-filter-mismatch",
-      });
-    });
-
-    expect(
-      screen.queryByText(/should-not-render-filter-mismatch/),
-    ).toBeNull();
-  });
-
-  it("WS push respects the active filter — matching activity_type is rendered", async () => {
-    render(<ActivityTab workspaceId="ws-1" />);
-    await waitFor(() => screen.getByText(/no activity/i));
-
-    clickButton(/tasks/i);
-    await waitFor(() => {
-      expect(
-        screen.getByRole("button", { name: /tasks/i }).getAttribute("aria-pressed"),
-      ).toBe("true");
-    });
-
-    await act(async () => {
-      emitActivity({
-        activityType: "task_update",
-        summary: "task-filter-match",
-      });
-    });
-
-    await waitFor(() => {
-      expect(screen.getByText(/task-filter-match/)).toBeTruthy();
-    });
-  });
-
-  it("WS push while autoRefresh is paused is ignored", async () => {
-    render(<ActivityTab workspaceId="ws-1" />);
-    await waitFor(() => screen.getByText(/no activity/i));
-
-    // Toggle Live → Paused.
-    clickButton(/live/i);
-    await waitFor(() => {
-      expect(screen.getByText(/Paused/)).toBeTruthy();
-    });
-
-    await act(async () => {
-      emitActivity({ summary: "should-not-render-paused" });
-    });
-
-    expect(screen.queryByText(/should-not-render-paused/)).toBeNull();
-  });
-
-  it("WS push for a row already in the list is deduped (no double-render)", async () => {
-    // Bootstrap with one row — same id as the WS push to trigger dedup.
-    mockGet.mockResolvedValueOnce([
-      makeEntry({ id: "shared-id", summary: "bootstrap-summary" }),
-    ]);
-    render(<ActivityTab workspaceId="ws-1" />);
-    await waitFor(() => {
-      expect(screen.getByText(/bootstrap-summary/)).toBeTruthy();
-    });
-    mockGet.mockClear();
-
-    // Push a row with the SAME id but a different summary — must not
-    // render the new summary; original row stays.
-    await act(async () => {
-      emitActivity({
-        id: "shared-id",
-        summary: "should-not-replace-existing",
-      });
-    });
-
-    expect(screen.queryByText(/should-not-replace-existing/)).toBeNull();
-    // Also verify count didn't grow.
-    expect(screen.getByText(/1 activities/)).toBeTruthy();
-  });
-
-  it("does NOT poll on a 5s interval after mount (post-#61)", async () => {
-    vi.useFakeTimers();
-    try {
-      render(<ActivityTab workspaceId="ws-1" />);
-      // Drain the mount-time bootstrap promise.
-      await act(async () => {
-        await Promise.resolve();
-        await Promise.resolve();
-      });
-      const callsAfterBootstrap = mockGet.mock.calls.length;
-      expect(callsAfterBootstrap).toBeGreaterThanOrEqual(1);
-
-      // Pre-#61: a 30s clock advance fires 6 more polls. Post-#61: 0.
-      await act(async () => {
-        vi.advanceTimersByTime(30_000);
-      });
-      expect(mockGet.mock.calls.length).toBe(callsAfterBootstrap);
-    } finally {
-      vi.useRealTimers();
-    }
-  });
-});
-
 // ── Suite 7: Activity count ───────────────────────────────────────────────────

 describe("ActivityTab — activity count", () => {
--- a/canvas/src/components/tests/CommunicationOverlay.test.tsx
+++ b/canvas/src/components/tests/CommunicationOverlay.test.tsx
@ -1,28 +1,18 @@
 // @vitest-environment jsdom
 /**
- * CommunicationOverlay tests — pin both the 2026-05-04 fan-out cap fix
- * AND the 2026-05-07 polling → ACTIVITY_LOGGED-subscriber refactor
- * (issue #61 stage 1).
+ * CommunicationOverlay tests — pin the rate-limit fix shipped 2026-05-04.
 *
- * The overlay used to poll /workspaces/:id/activity?limit=5 on a 30s
- * interval per online workspace (capped at 3). Post-#61: it bootstraps
- * once on mount via the same HTTP path (cap of 3 retained), then
- * subscribes to ACTIVITY_LOGGED via the global socket bus for live
- * updates. No interval poll.
+ * The overlay polls /workspaces/:id/activity?limit=5 for each online
+ * workspace. Pre-fix it (a) polled regardless of visibility and (b)
+ * fanned out to 6 workspaces every 10s. With 8+ workspaces a user
+ * triggered sustained 429s (server-side rate limit is 600 req/min/IP).
 *
 * These tests pin:
- *  1. Bootstrap fan-out cap of 3 — even with 6 online nodes, only 3
- *     HTTP fetches on mount.
- *  2. Visibility gate — when collapsed, no HTTP fetches; re-open
- *     re-bootstraps.
- *  3. NO interval polling — advancing the clock past 30s does not fire
- *     additional HTTP calls.
- *  4. WS push extends the rendered list without firing any HTTP call.
- *  5. WS push for an offline workspace is ignored.
- *  6. WS push for a non-comm activity_type is ignored.
+ *  1. Fan-out cap of 3 — even with 6 online nodes, only 3 fetches
+ *  2. Visibility gate — when collapsed, no polling
 *
- * If a future refactor regresses any of these, CI fails before the
- * regression hits a paying tenant.
+ * If a future refactor pushes either dial back up, CI fails before
+ * the regression hits a paying tenant.
 */
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import { render, cleanup, act, fireEvent } from "@testing-library/react";
@ -33,7 +23,7 @@ vi.mock("@/lib/api", () => ({
  api: { get: vi.fn() },
 }));

-// Six online nodes — enough to verify the bootstrap cap of 3.
+// Six online nodes — enough to verify the cap of 3.
 const mockStoreState = {
  selectedNodeId: null as string | null,
  nodes: [
@ -66,10 +56,6 @@ vi.mock("@/lib/design-tokens", () => ({
 // ── Imports (after mocks) ─────────────────────────────────────────────────────

 import { api } from "@/lib/api";
-import {
-  emitSocketEvent,
-  _resetSocketEventListenersForTests,
-} from "@/store/socket-events";
 import { CommunicationOverlay } from "../CommunicationOverlay";

 const mockGet = vi.mocked(api.get);
@ -80,34 +66,30 @@ beforeEach(() => {
  vi.useFakeTimers();
  mockGet.mockReset();
  mockGet.mockResolvedValue([]);
-  // Drop any subscribers the previous test left on the singleton bus —
-  // each render adds one via useSocketEvent.
-  _resetSocketEventListenersForTests();
 });

 afterEach(() => {
  cleanup();
  vi.useRealTimers();
-  _resetSocketEventListenersForTests();
 });

 // ── Tests ─────────────────────────────────────────────────────────────────────

-describe("CommunicationOverlay — bootstrap fan-out cap", () => {
-  it("bootstraps at most 3 of 6 online workspaces (rate-limit floor preserved post-#61)", async () => {
+describe("CommunicationOverlay — fan-out cap", () => {
+  it("polls at most 3 of 6 online workspaces (rate-limit floor)", async () => {
    await act(async () => {
      render(<CommunicationOverlay />);
    });
-    // Mount fires the bootstrap synchronously — pre-#61 this was the
-    // first poll cycle; post-#61 it's the only HTTP fetch (live updates
-    // arrive via WS push). 6 nodes → 3 fetches.
+    // Mount fires the first poll synchronously (no interval tick yet).
+    // Pre-fix: 6 calls. Post-fix: 3.
    expect(mockGet).toHaveBeenCalledTimes(3);
+    // Verify the calls are for the FIRST 3 online nodes (slice order).
    expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-1/activity?limit=5");
    expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-2/activity?limit=5");
    expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-3/activity?limit=5");
  });

-  it("never bootstraps offline workspaces", async () => {
+  it("never polls offline workspaces", async () => {
    await act(async () => {
      render(<CommunicationOverlay />);
    });
@ -117,39 +99,40 @@ describe("CommunicationOverlay — bootstrap fan-out cap", () => {
  });
 });

-describe("CommunicationOverlay — no interval polling (post-#61)", () => {
-  // The pre-#61 implementation re-fetched every 30s per workspace.
-  // Post-#61 the only HTTP path is the bootstrap on mount + on
-  // visibility-toggle. This test pins the absence of any interval
-  // poll: a 60s clock advance must not produce a second round of
-  // fetches.
-  it("does NOT poll on a 30s interval after bootstrap", async () => {
+describe("CommunicationOverlay — cadence", () => {
+  it("uses 30s interval cadence (was 10s pre-fix)", async () => {
    await act(async () => {
      render(<CommunicationOverlay />);
    });
-    expect(mockGet).toHaveBeenCalledTimes(3); // initial bootstrap
-    mockGet.mockClear();
+    expect(mockGet).toHaveBeenCalledTimes(3); // initial mount poll

-    // Advance 60s — well past any plausible cadence the prior version
-    // could have used.
+    // Advance 10s — pre-fix this would fire another poll. Post-fix: silent.
    await act(async () => {
-      vi.advanceTimersByTime(60_000);
+      vi.advanceTimersByTime(10_000);
    });
-    expect(mockGet).not.toHaveBeenCalled();
+    expect(mockGet).toHaveBeenCalledTimes(3);
+
+    // Advance to 30s — interval fires.
+    await act(async () => {
+      vi.advanceTimersByTime(20_000);
+    });
+    expect(mockGet).toHaveBeenCalledTimes(6); // +3 from second tick
  });
 });

 describe("CommunicationOverlay — visibility gate", () => {
-  // The visibility gate now does two things post-#61:
-  //   - while closed, the WS handler short-circuits (no setComms churn)
-  //   - re-opening triggers a fresh bootstrap so the list reflects
-  //     anything that happened while the panel was collapsed
+  // The visibility gate is the dial that drops collapsed-panel polling
+  // to ZERO. The cadence test above can't catch its removal — if a
+  // refactor dropped `if (!visible) return`, the cadence test would
+  // still pass because the effect would still fire every 30s.
  //
  // Direct probe: render with comms-returning mock so the panel
  // actually renders (close button only exists in the expanded panel,
  // not the collapsed button-state). Click close, advance the clock,
  // assert no further fetches.
-  it("stops fetching while collapsed and re-bootstraps on re-open", async () => {
+  it("stops polling after the user collapses the panel", async () => {
+    // Mock returns one a2a_send so comms.length > 0 → panel renders →
+    // close button accessible.
    mockGet.mockResolvedValue([
      {
        id: "act-1",
@ -167,202 +150,29 @@ describe("CommunicationOverlay — visibility gate", () => {
    const { getByLabelText } = await act(async () => {
      return render(<CommunicationOverlay />);
    });
-    // Drain pending microtasks (resolves the await in bootstrap) so
-    // setComms lands and the panel renders. Don't advance time — it's
-    // not load-bearing for the gate test, but matches the pattern used
-    // pre-#61 for stability.
+    // Drain pending microtasks (resolves the await in fetchComms) so
+    // setComms lands and the panel renders. Don't advance time — that
+    // would fire the next interval tick and pollute the assertion.
    await act(async () => {
      await Promise.resolve();
      await Promise.resolve();
      await Promise.resolve();
    });
-    expect(mockGet).toHaveBeenCalledTimes(3); // initial bootstrap
+    // Initial mount polled 3 workspaces.
+    expect(mockGet).toHaveBeenCalledTimes(3);
    mockGet.mockClear();

-    // Click close. While closed, no fetches and no WS-driven updates.
+    // Click the close button. Synchronous getByLabelText avoids
+    // findBy's internal setTimeout (deadlocks under useFakeTimers).
    const closeBtn = getByLabelText("Close communications panel");
    await act(async () => {
      fireEvent.click(closeBtn);
    });
+
+    // Advance well past the 30s cadence — gate should suppress the tick.
    await act(async () => {
      vi.advanceTimersByTime(60_000);
    });
    expect(mockGet).not.toHaveBeenCalled();
-
-    // Re-open via the collapsed button. Must trigger a fresh bootstrap.
-    const openBtn = getByLabelText("Show communications panel");
-    await act(async () => {
-      fireEvent.click(openBtn);
-    });
-    await act(async () => {
-      await Promise.resolve();
-      await Promise.resolve();
-    });
-    expect(mockGet).toHaveBeenCalledTimes(3); // re-bootstrap on re-open
-  });
-});
-
-describe("CommunicationOverlay — WS subscription (#61 stage 1 core)", () => {
-  // The load-bearing post-#61 behaviour. Every test in this block must
-  // verify (a) the WS push DID update the rendered comms list, and
-  // (b) NO additional HTTP call was fired — the whole point of the
-  // refactor is to remove the polling-driven HTTP traffic.
-  function emitActivityLogged(overrides: Partial<{
-    workspaceId: string;
-    payload: Record<string, unknown>;
-  }> = {}) {
-    emitSocketEvent({
-      event: "ACTIVITY_LOGGED",
-      workspace_id: overrides.workspaceId ?? "ws-1",
-      timestamp: new Date().toISOString(),
-      payload: {
-        id: `act-${Math.random().toString(36).slice(2)}`,
-        activity_type: "a2a_send",
-        source_id: "ws-1",
-        target_id: "ws-2",
-        summary: "live push",
-        status: "ok",
-        duration_ms: 42,
-        created_at: new Date().toISOString(),
-        ...overrides.payload,
-      },
-    });
-  }
-
-  it("WS push for a comm activity_type extends the rendered list with NO additional HTTP call", async () => {
-    const { container } = await act(async () => {
-      return render(<CommunicationOverlay />);
-    });
-    expect(mockGet).toHaveBeenCalledTimes(3); // bootstrap
-    mockGet.mockClear();
-
-    await act(async () => {
-      emitActivityLogged({ payload: { summary: "hello" } });
-    });
-    await act(async () => {
-      await Promise.resolve();
-    });
-
-    // Two pins:
-    //   1. comms list reflects the live push (look for the summary text)
-    //   2. zero HTTP fetches fired during the WS path
-    expect(container.textContent).toContain("hello");
-    expect(mockGet).not.toHaveBeenCalled();
-  });
-
-  it("WS push for an offline workspace is ignored", async () => {
-    const { container } = await act(async () => {
-      return render(<CommunicationOverlay />);
-    });
-    mockGet.mockClear();
-
-    await act(async () => {
-      emitActivityLogged({
-        workspaceId: "ws-offline",
-        payload: { source_id: "ws-offline", summary: "should-not-render" },
-      });
-    });
-    await act(async () => {
-      await Promise.resolve();
-    });
-
-    expect(container.textContent).not.toContain("should-not-render");
-    expect(mockGet).not.toHaveBeenCalled();
-  });
-
-  it("WS push for a non-comm activity_type is ignored (e.g. delegation)", async () => {
-    const { container } = await act(async () => {
-      return render(<CommunicationOverlay />);
-    });
-    mockGet.mockClear();
-
-    await act(async () => {
-      emitActivityLogged({
-        payload: {
-          activity_type: "delegation",
-          summary: "should-not-render-delegation",
-        },
-      });
-    });
-    await act(async () => {
-      await Promise.resolve();
-    });
-
-    expect(container.textContent).not.toContain("should-not-render-delegation");
-    expect(mockGet).not.toHaveBeenCalled();
-  });
-
-  it("WS push while the panel is collapsed is ignored (no churn on hidden state)", async () => {
-    // Bootstrap with one comm so the panel renders → close button
-    // accessible. Then collapse, emit a WS push, re-open: the rendered
-    // list must come from the re-bootstrap, NOT from the WS-push that
-    // arrived during the closed state. Also: nothing visible while
-    // closed (the collapsed button shows only the count, not summaries).
-    mockGet.mockResolvedValue([
-      {
-        id: "act-bootstrap",
-        workspace_id: "ws-1",
-        activity_type: "a2a_send",
-        source_id: "ws-1",
-        target_id: "ws-2",
-        summary: "bootstrap-summary",
-        status: "ok",
-        duration_ms: 1,
-        created_at: new Date().toISOString(),
-      },
-    ]);
-    const { getByLabelText, container } = await act(async () => {
-      return render(<CommunicationOverlay />);
-    });
-    await act(async () => {
-      await Promise.resolve();
-      await Promise.resolve();
-    });
-
-    // Collapse.
-    const closeBtn = getByLabelText("Close communications panel");
-    await act(async () => {
-      fireEvent.click(closeBtn);
-    });
-
-    // Bootstrap mock returns nothing on the re-open path so we can
-    // distinguish "WS push leaked through the gate" from "re-bootstrap
-    // refilled the list."
-    mockGet.mockReset();
-    mockGet.mockResolvedValue([]);
-
-    await act(async () => {
-      emitActivityLogged({
-        payload: { summary: "leaked-while-closed" },
-      });
-    });
-    await act(async () => {
-      await Promise.resolve();
-    });
-
-    // Closed state: rendered DOM must not show any push-derived text.
-    expect(container.textContent).not.toContain("leaked-while-closed");
-  });
-
-  it("non-ACTIVITY_LOGGED events are ignored (e.g. WORKSPACE_OFFLINE)", async () => {
-    const { container } = await act(async () => {
-      return render(<CommunicationOverlay />);
-    });
-    mockGet.mockClear();
-
-    await act(async () => {
-      emitSocketEvent({
-        event: "WORKSPACE_OFFLINE",
-        workspace_id: "ws-1",
-        timestamp: new Date().toISOString(),
-        payload: { summary: "should-not-render-event" },
-      });
-    });
-    await act(async () => {
-      await Promise.resolve();
-    });
-
-    expect(container.textContent).not.toContain("should-not-render-event");
-    expect(mockGet).not.toHaveBeenCalled();
  });
 });
--- a/canvas/src/components/tabs/ActivityTab.tsx
+++ b/canvas/src/components/tabs/ActivityTab.tsx
@ -1,9 +1,8 @@
 "use client";

-import { useState, useEffect, useCallback, useRef } from "react";
+import { useState, useEffect, useCallback } from "react";
 import { api } from "@/lib/api";
 import { ConversationTraceModal } from "@/components/ConversationTraceModal";
-import { useSocketEvent } from "@/hooks/useSocketEvent";
 import { type ActivityEntry } from "@/types/activity";
 import { useWorkspaceName } from "@/hooks/useWorkspaceName";
 import { inferA2AErrorHint } from "./chat/a2aErrorHint";
@ -49,15 +48,6 @@ export function ActivityTab({ workspaceId }: Props) {
  const [traceOpen, setTraceOpen] = useState(false);
  const resolveName = useWorkspaceName();

-  // Refs let the WS handler read the latest filter / autoRefresh
-  // selection without re-subscribing on every state change. The bus
-  // listener is registered exactly once per mount via useSocketEvent's
-  // ref-internal pattern; subscriber-side filtering reads from these.
-  const filterRef = useRef(filter);
-  filterRef.current = filter;
-  const autoRefreshRef = useRef(autoRefresh);
-  autoRefreshRef.current = autoRefresh;
-
  const loadActivities = useCallback(async () => {
    try {
      const typeParam = filter !== "all" ? `?type=${filter}` : "";
@ -76,58 +66,11 @@ export function ActivityTab({ workspaceId }: Props) {
    loadActivities();
  }, [loadActivities]);

-  // Live-update path (issue #61 stage 3, replaces the 5s setInterval).
-  // ACTIVITY_LOGGED events from this workspace prepend to the rendered
-  // list — dedup by id so a server-side update + a poll reply don't
-  // double-render the same row.
-  //
-  // Honours the user's autoRefresh toggle: when paused, live updates
-  // are dropped until the user re-enables Live (or hits Refresh, which
-  // re-bootstraps via loadActivities).
-  //
-  // Filter awareness: matches the server-side `?type=<filter>`
-  // semantics so the panel doesn't show rows the user excluded.
-  useSocketEvent((msg) => {
-    if (!autoRefreshRef.current) return;
-    if (msg.event !== "ACTIVITY_LOGGED") return;
-    if (msg.workspace_id !== workspaceId) return;
-
-    const p = (msg.payload || {}) as Record<string, unknown>;
-    const activityType = (p.activity_type as string) || "";
-
-    const f = filterRef.current;
-    if (f !== "all" && activityType !== f) return;
-
-    const entry: ActivityEntry = {
-      id:
-        (p.id as string) ||
-        `ws-push-${msg.timestamp || Date.now()}-${msg.workspace_id}`,
-      workspace_id: msg.workspace_id,
-      activity_type: activityType,
-      source_id: (p.source_id as string | null) ?? null,
-      target_id: (p.target_id as string | null) ?? null,
-      method: (p.method as string | null) ?? null,
-      summary: (p.summary as string | null) ?? null,
-      request_body: (p.request_body as Record<string, unknown> | null) ?? null,
-      response_body:
-        (p.response_body as Record<string, unknown> | null) ?? null,
-      duration_ms: (p.duration_ms as number | null) ?? null,
-      status: (p.status as string) || "ok",
-      error_detail: (p.error_detail as string | null) ?? null,
-      created_at:
-        (p.created_at as string) ||
-        msg.timestamp ||
-        new Date().toISOString(),
-    };
-
-    setActivities((prev) => {
-      // Dedup by id — a row that arrived via the bootstrap fetch and
-      // also fires ACTIVITY_LOGGED from a delayed server-side hook
-      // must render exactly once.
-      if (prev.some((e) => e.id === entry.id)) return prev;
-      return [entry, ...prev];
-    });
-  });
+  useEffect(() => {
+    if (!autoRefresh) return;
+    const interval = setInterval(loadActivities, 5000);
+    return () => clearInterval(interval);
+  }, [loadActivities, autoRefresh]);

  return (
    <div className="flex flex-col h-full">
--- a/canvas/src/components/tabs/chat/AttachmentAudio.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentAudio.tsx
@ -9,7 +9,6 @@
 // AttachmentLightbox).

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentChip } from "./AttachmentViews";
@ -44,8 +43,13 @@ export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: P
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          signal: AbortSignal.timeout(60_000),
        });
@ -112,5 +116,9 @@ export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: P
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentImage.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentImage.tsx
@ -35,7 +35,6 @@
 //   downscale via canvas, but defer that to v2.

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentLightbox } from "./AttachmentLightbox";
@ -76,14 +75,22 @@ export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: P
    }

    // Platform-auth path: identical to downloadChatFile but we keep
-    // the blob (don't trigger a Save-As). Auth headers come from the
-    // shared `platformAuthHeaders()` helper — one source of truth for
-    // every authenticated raw fetch in the canvas (#178).
+    // the blob (don't trigger a Save-As). Use the same headers it does
+    // by going through it indirectly — no, downloadChatFile triggers a
+    // Save-As. Need a separate fetch.
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        // Read the same env var downloadChatFile reads — single source
+        // of truth would be cleaner; refactor opportunity for PR-2 if
+        // we add the same path to AttachmentVideo.
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          signal: AbortSignal.timeout(30_000),
        });
@ -177,7 +184,15 @@ export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: P
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api which uses the canonical
-// getTenantSlug() from @/lib/tenant. This eliminates the duplicate
-// hostname-regex + the duplicate bearer-token-attach pattern (#178).
+// Internal helper — duplicated from uploads.ts (it's not exported
+// there). Kept local so this component doesn't reach into private
+// surface; if AttachmentVideo / AttachmentPDF in PR-2/PR-3 also need
+// it, lift to an exported helper at that point (the third-caller
+// rule).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  // Tenant subdomain shape: <slug>.moleculesai.app
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentPDF.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentPDF.tsx
@ -33,7 +33,6 @@
 //     timeout, swap to chip. Implemented as a 3-second watchdog.

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentLightbox } from "./AttachmentLightbox";
@ -70,8 +69,13 @@ export function AttachmentPDF({ workspaceId, attachment, onDownload, tone }: Pro
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          signal: AbortSignal.timeout(60_000),
        });
@ -185,5 +189,9 @@ function PdfGlyph() {
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentTextPreview.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentTextPreview.tsx
@ -26,7 +26,6 @@
 // to download the full file.

 import { useState, useEffect } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentChip } from "./AttachmentViews";
@ -58,13 +57,13 @@ export function AttachmentTextPreview({ workspaceId, attachment, onDownload, ton
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
-        // Only attach platform auth headers for in-platform URIs —
-        // off-platform URLs (HTTP/HTTPS attachments) MUST NOT receive
-        // our bearer token (it would leak the admin token to a third
-        // party). The branch is preserved with the new shared helper.
-        const headers: Record<string, string> = isPlatformAttachment(attachment.uri)
-          ? platformAuthHeaders()
-          : {};
+        const headers: Record<string, string> = {};
+        if (isPlatformAttachment(attachment.uri)) {
+          const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+          if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+          const slug = getTenantSlug();
+          if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        }
        const res = await fetch(href, {
          headers,
          credentials: "include",
@ -183,5 +182,9 @@ export function AttachmentTextPreview({ workspaceId, attachment, onDownload, ton
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentVideo.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentVideo.tsx
@ -25,7 +25,6 @@
 // fetch via service worker. v2 if measured-needed.

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentChip } from "./AttachmentViews";
@ -62,8 +61,13 @@ export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: P
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          // Videos are larger than images on average; give the request
          // more headroom. The server's per-request body cap (50MB) is
@ -143,5 +147,11 @@ export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: P
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+// Internal helper — same shape as AttachmentImage's. Lifted to a
+// shared util in PR-2.5 if a third caller needs it (PDF, audio).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/uploads.ts
+++ b/canvas/src/components/tabs/chat/uploads.ts
@ -1,16 +1,12 @@
-import { PLATFORM_URL, platformAuthHeaders } from "@/lib/api";
+import { PLATFORM_URL } from "@/lib/api";
+import { getTenantSlug } from "@/lib/tenant";
 import type { ChatAttachment } from "./types";

 /** Chat attachments are intentionally uploaded via a direct fetch()
 *  instead of the `api.post` helper — `api.post` JSON-stringifies the
- *  body, which would 500 on a Blob. Auth headers (tenant slug, admin
- *  token, credentials) come from `platformAuthHeaders()` — the same
- *  helper `request()` uses, so a missing bearer surfaces as a single
- *  fix site instead of N copies. We deliberately do NOT set
- *  Content-Type so the browser writes the multipart boundary into the
- *  header; setting it manually would yield a multipart body the server
- *  can't parse. See lib/api.ts platformAuthHeaders() for the full
- *  rationale on why this pair must stay matched. */
+ *  body, which would 500 on a Blob. Mirrors the header plumbing
+ *  (tenant slug, admin token, credentials) so SaaS + self-hosted
+ *  callers work the same way. */
 export async function uploadChatFiles(
  workspaceId: string,
  files: File[],
@ -20,12 +16,18 @@ export async function uploadChatFiles(
  const form = new FormData();
  for (const f of files) form.append("files", f, f.name);

+  const headers: Record<string, string> = {};
+  const slug = getTenantSlug();
+  if (slug) headers["X-Molecule-Org-Slug"] = slug;
+  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+
  // Uploads legitimately take a while on cold cache (tar write +
  // docker cp into the container). 60s is comfortable for the 25MB/
  // 50MB caps the server enforces.
  const res = await fetch(`${PLATFORM_URL}/workspaces/${workspaceId}/chat/uploads`, {
    method: "POST",
-    headers: platformAuthHeaders(),
+    headers,
    body: form,
    credentials: "include",
    signal: AbortSignal.timeout(60_000),
@ -141,8 +143,14 @@ export async function downloadChatFile(
    return;
  }

+  const headers: Record<string, string> = {};
+  const slug = getTenantSlug();
+  if (slug) headers["X-Molecule-Org-Slug"] = slug;
+  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+
  const res = await fetch(href, {
-    headers: platformAuthHeaders(),
+    headers,
    credentials: "include",
    signal: AbortSignal.timeout(60_000),
  });
--- a/canvas/src/lib/tests/admin-token-pair.test.ts
+++ b/canvas/src/lib/tests/admin-token-pair.test.ts
@ -1,130 +0,0 @@
-// @vitest-environment node
-import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
-
-// Tests for the boot-time matched-pair guard added to next.config.ts.
-//
-// Why this lives in src/lib/__tests__ even though the function is in
-// canvas/next.config.ts:
-//   - next.config.ts runs as ESM-but-also-CJS depending on which
-//     consumer loads it (Next.js dev server vs Next.js build); we
-//     want the test to be a plain ESM module Vitest already handles.
-//   - Importing from "../../../next.config" pulls in the rest of the
-//     file (loadMonorepoEnv, the default export, etc.) which has
-//     side effects on module load (it runs loadMonorepoEnv()
-//     immediately). To keep the test hermetic we don't import — we
-//     duplicate the function under test.
-//
-// Sourcing the function from a shared module would be cleaner, but
-// next.config.ts is required to be a single self-contained file by
-// Next.js's loader on some host configurations. Pin invariant: the
-// duplicated function below MUST stay byte-identical to the one in
-// next.config.ts. If you change one, change the other and bump this
-// comment.
-
-function checkAdminTokenPair(): void {
-  const serverSet = !!process.env.ADMIN_TOKEN;
-  const clientSet = !!process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  if (serverSet === clientSet) return;
-  if (serverSet && !clientSet) {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
-        "canvas will 401 against workspace-server because the bearer header " +
-        "is never attached. Set both to the same value, or unset both.",
-    );
-  } else {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
-        "workspace-server will reject the bearer because no AdminAuth gate " +
-        "is configured. Set both to the same value, or unset both.",
-    );
-  }
-}
-
-describe("checkAdminTokenPair", () => {
-  // Snapshot env so individual tests can stomp on it without leaking.
-  // Rebuild from snapshot in afterEach so the next test sees a known
-  // baseline regardless of mutation pattern.
-  let originalEnv: Record<string, string | undefined>;
-  let errorSpy: ReturnType<typeof vi.spyOn>;
-
-  beforeEach(() => {
-    originalEnv = {
-      ADMIN_TOKEN: process.env.ADMIN_TOKEN,
-      NEXT_PUBLIC_ADMIN_TOKEN: process.env.NEXT_PUBLIC_ADMIN_TOKEN,
-    };
-    delete process.env.ADMIN_TOKEN;
-    delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    errorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
-  });
-
-  afterEach(() => {
-    if (originalEnv.ADMIN_TOKEN === undefined) delete process.env.ADMIN_TOKEN;
-    else process.env.ADMIN_TOKEN = originalEnv.ADMIN_TOKEN;
-    if (originalEnv.NEXT_PUBLIC_ADMIN_TOKEN === undefined) delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    else process.env.NEXT_PUBLIC_ADMIN_TOKEN = originalEnv.NEXT_PUBLIC_ADMIN_TOKEN;
-    errorSpy.mockRestore();
-  });
-
-  it("emits no warning when both are unset", () => {
-    checkAdminTokenPair();
-    expect(errorSpy).not.toHaveBeenCalled();
-  });
-
-  it("emits no warning when both are set (matched pair, the happy path)", () => {
-    process.env.ADMIN_TOKEN = "local-dev-admin";
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
-    checkAdminTokenPair();
-    expect(errorSpy).not.toHaveBeenCalled();
-  });
-
-  it("warns when ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not", () => {
-    process.env.ADMIN_TOKEN = "local-dev-admin";
-    checkAdminTokenPair();
-    expect(errorSpy).toHaveBeenCalledTimes(1);
-    // Exact-string assertion — substring would also pass when the
-    // function's branch logic is broken (e.g. emits both messages, or
-    // emits the wrong one). Pin the exact message that operators will
-    // see in their dev console so regressions are visible.
-    expect(errorSpy).toHaveBeenCalledWith(
-      "[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
-        "canvas will 401 against workspace-server because the bearer header " +
-        "is never attached. Set both to the same value, or unset both.",
-    );
-  });
-
-  it("warns when NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not", () => {
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
-    checkAdminTokenPair();
-    expect(errorSpy).toHaveBeenCalledTimes(1);
-    expect(errorSpy).toHaveBeenCalledWith(
-      "[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
-        "workspace-server will reject the bearer because no AdminAuth gate " +
-        "is configured. Set both to the same value, or unset both.",
-    );
-  });
-
-  // Empty string in process.env is the JS-side representation of `KEY=`
-  // (no value) in a .env file. Treating "" as unset makes the pair
-  // invariant symmetric: `KEY=` and `unset KEY` produce the same
-  // verdict. Without this branch, an operator who comments out the
-  // value but leaves the line would get a false-positive warning.
-  it("treats empty string as unset (so KEY= and unset KEY are equivalent)", () => {
-    process.env.ADMIN_TOKEN = "";
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
-    checkAdminTokenPair();
-    expect(errorSpy).not.toHaveBeenCalled();
-  });
-
-  it("warns when ADMIN_TOKEN is set and NEXT_PUBLIC_ADMIN_TOKEN is empty string", () => {
-    process.env.ADMIN_TOKEN = "local-dev-admin";
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
-    checkAdminTokenPair();
-    expect(errorSpy).toHaveBeenCalledTimes(1);
-    // First branch — server set, client unset.
-    expect(errorSpy).toHaveBeenCalledWith(
-      expect.stringContaining("ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not"),
-    );
-  });
-});
--- a/canvas/src/lib/tests/platform-auth-headers.test.ts
+++ b/canvas/src/lib/tests/platform-auth-headers.test.ts
@ -1,97 +0,0 @@
-// @vitest-environment jsdom
-import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
-
-// Tests for platformAuthHeaders — the shared helper extracted in #178
-// to consolidate the bearer-token-attach + tenant-slug-attach pattern
-// that was previously duplicated across 7 raw-fetch callsites in the
-// canvas (uploads + 5 Attachment* components + the api.ts request()
-// function).
-//
-// What we pin here:
-//  - Returns a fresh object each call (so callers can mutate without
-//    leaking into each other).
-//  - Empty result on a non-tenant host with no admin token (the
-//    localhost / self-hosted shape).
-//  - Bearer attached when NEXT_PUBLIC_ADMIN_TOKEN is set.
-//  - X-Molecule-Org-Slug attached when window.location.hostname is a
-//    tenant subdomain (<slug>.moleculesai.app).
-//  - Both attached when both apply (the production SaaS shape).
-//
-// Why jsdom: getTenantSlug() reads window.location.hostname. Node-only
-// environment yields no window and getTenantSlug returns null
-// unconditionally — wouldn't exercise the slug branch.
-
-import { platformAuthHeaders } from "../api";
-
-describe("platformAuthHeaders", () => {
-  let originalAdminToken: string | undefined;
-
-  beforeEach(() => {
-    originalAdminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  });
-
-  afterEach(() => {
-    if (originalAdminToken === undefined) delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    else process.env.NEXT_PUBLIC_ADMIN_TOKEN = originalAdminToken;
-    // jsdom resets hostname between tests via the @vitest-environment
-    // pragma's per-test isolation. No explicit reset needed.
-  });
-
-  it("returns an empty object on a non-tenant host with no admin token", () => {
-    // jsdom default hostname is "localhost" — not a tenant slug, so
-    // getTenantSlug() returns null and no X-Molecule-Org-Slug is added.
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({});
-  });
-
-  it("attaches Authorization when NEXT_PUBLIC_ADMIN_TOKEN is set", () => {
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({ Authorization: "Bearer local-dev-admin" });
-  });
-
-  it("does NOT attach Authorization when NEXT_PUBLIC_ADMIN_TOKEN is empty string", () => {
-    // Empty-string env is the JS-side shape of `KEY=` in .env.
-    // Treating it as unset matches the matched-pair guard in
-    // next.config.ts (admin-token-pair.test.ts) — symmetric semantics.
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({});
-  });
-
-  it("attaches X-Molecule-Org-Slug on a tenant subdomain", () => {
-    Object.defineProperty(window, "location", {
-      value: { hostname: "reno-stars.moleculesai.app" },
-      writable: true,
-    });
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({ "X-Molecule-Org-Slug": "reno-stars" });
-  });
-
-  it("attaches both when both apply (production SaaS shape)", () => {
-    Object.defineProperty(window, "location", {
-      value: { hostname: "reno-stars.moleculesai.app" },
-      writable: true,
-    });
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "tenant-bearer";
-    const headers = platformAuthHeaders();
-    // Pin exact-equality on the full shape — substring/contains
-    // assertions would also pass for an extra-header bug.
-    expect(headers).toEqual({
-      "X-Molecule-Org-Slug": "reno-stars",
-      Authorization: "Bearer tenant-bearer",
-    });
-  });
-
-  it("returns a fresh object each call (callers can mutate safely)", () => {
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "tok";
-    const a = platformAuthHeaders();
-    const b = platformAuthHeaders();
-    expect(a).not.toBe(b); // distinct refs
-    expect(a).toEqual(b); // same content
-    a["Content-Type"] = "application/json";
-    // Mutation on `a` does not leak into `b`.
-    expect(b["Content-Type"]).toBeUndefined();
-  });
-});
--- a/canvas/src/lib/api.ts
+++ b/canvas/src/lib/api.ts
@ -21,45 +21,6 @@ export interface RequestOptions {
  timeoutMs?: number;
 }

-/**
- * Build the platform auth header set used by every authenticated fetch
- * from the canvas. Returns a fresh object so callers can mutate (e.g.
- * append `Content-Type` for JSON requests, omit it for FormData).
- *
- * SaaS cross-origin shape:
- *  - `X-Molecule-Org-Slug` — derived from `window.location.hostname`
- *    by `getTenantSlug()`. Control plane uses it for fly-replay
- *    routing. Empty on localhost / non-tenant hosts — safe to omit.
- *  - `Authorization: Bearer <token>` — `NEXT_PUBLIC_ADMIN_TOKEN` baked
- *    into the canvas build (see canvas/Dockerfile L8/L11). Required by
- *    the workspace-server when `ADMIN_TOKEN` is set on the server side
- *    (Tier-2b AdminAuth gate, wsauth_middleware.go ~L245). Empty when
- *    no admin token was provisioned — the Tier-1 session-cookie path
- *    handles that case via `credentials:"include"`.
- *
- * Why a shared helper: the two-line "read env, attach bearer; read
- * slug, attach header" pattern was duplicated across `request()` and
- * 7 raw-fetch callsites (chat uploads/download + 5 Attachment*
- * components) before this consolidation. A new poller or raw fetch
- * that forgets one of the two headers silently 401s against
- * workspace-server when ADMIN_TOKEN is set — the exact bug shape
- * called out in #178 / closes the post-#176 self-review gap.
- *
- * Callers that want JSON Content-Type should spread this and add it
- * themselves; FormData callers should NOT add Content-Type (the
- * browser sets the multipart boundary). Centralizing the auth pair
- * but leaving Content-Type up to the caller is the minimum viable
- * shared shape.
- */
-export function platformAuthHeaders(): Record<string, string> {
-  const headers: Record<string, string> = {};
-  const slug = getTenantSlug();
-  if (slug) headers["X-Molecule-Org-Slug"] = slug;
-  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
-  return headers;
-}
-
 async function request<T>(
  method: string,
  path: string,
@ -67,16 +28,17 @@ async function request<T>(
  retryCount = 0,
  options?: RequestOptions,
 ): Promise<T> {
-  // JSON-bodied request — Content-Type is JSON. Auth pair comes from
-  // the shared helper; see its doc comment for the SaaS-shape rationale.
-  const headers: Record<string, string> = {
-    "Content-Type": "application/json",
-    ...platformAuthHeaders(),
-  };
-  // Re-read slug locally for the 401 handler below — `headers` already
-  // has it, but the 401 branch needs the bare value to gate the
-  // session-probe + redirect logic on tenant context.
+  // SaaS cross-origin shape:
+  //  - X-Molecule-Org-Slug: derived from window.location.hostname by
+  //    getTenantSlug(). Control plane uses it for fly-replay routing.
+  //    Empty on localhost / non-tenant hosts — safe to omit.
+  //  - credentials:"include": sends the session cookie cross-origin.
+  //    Cookie's Domain=.moleculesai.app attribute + cp's CORS allow this.
+  const headers: Record<string, string> = { "Content-Type": "application/json" };
  const slug = getTenantSlug();
+  if (slug) headers["X-Molecule-Org-Slug"] = slug;
+  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;

  const res = await fetch(`${PLATFORM_URL}${path}`, {
    method,
--- a/canvas/vitest.config.ts
+++ b/canvas/vitest.config.ts
@ -7,32 +7,6 @@ export default defineConfig({
  test: {
    environment: 'node',
    exclude: ['e2e/**', 'node_modules/**', '**/dist/**'],
-    // CI-conditional test timeout (issue #96).
-    //
-    // Vitest's 5000ms default is too tight for the first test in any
-    // file under our CI shape: `npx vitest run --coverage` on the
-    // self-hosted Gitea Actions Docker runner. The cold-start cost
-    // (v8 coverage instrumentation init + JSDOM bootstrap + module-
-    // graph import for @/components/* and @/lib/* + first React
-    // render) consistently consumes 5-7 seconds for the first
-    // synchronous test in heavyweight component files
-    // (ActivityTab.test.tsx, CreateWorkspaceDialog.test.tsx,
-    // ConfigTab.provider.test.tsx) — even though every subsequent
-    // test in the same file completes in 100-1500ms.
-    //
-    // Empirically the worst observed first-test was 6453ms in a
-    // single file (CreateWorkspaceDialog). 30000ms gives ~5x
-    // headroom over that on CI; we still keep 5000ms locally so
-    // genuine waitFor races / hung promises stay sensitive in dev.
-    //
-    // Same vitest pattern documented at:
-    //   https://vitest.dev/config/testtimeout
-    //   https://vitest.dev/guide/coverage#profiling-test-performance
-    //
-    // Per-test duration is still emitted to the CI log; if a test
-    // ever silently approaches 25-30s under this raised ceiling that
-    // will surface as a duration regression and we revisit.
-    testTimeout: process.env.CI ? 30000 : 5000,
    // Coverage is instrumented but NOT yet a CI gate — first land
    // observability so we can see the baseline, then dial in
    // thresholds + a hard gate in a follow-up PR (#1815). Today's
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@ -1,43 +0,0 @@
-# docker-compose.dev.yml — overlay over docker-compose.yml for local dev
-# with air-driven live reload of the platform (workspace-server) service.
-#
-# Usage:
-#   docker compose -f docker-compose.yml -f docker-compose.dev.yml up
-#   (or `make dev` shorthand from repo root)
-#
-# What this overlay changes vs docker-compose.yml alone:
-#   - Platform service uses workspace-server/Dockerfile.dev (air on top of
-#     golang:1.25-alpine) instead of the multi-stage prod Dockerfile.
-#   - Platform service bind-mounts the host's workspace-server/ source
-#     into /app/workspace-server so air sees source edits live.
-#   - Other services (postgres, redis, langfuse, etc.) inherit unchanged
-#     from docker-compose.yml.
-#
-# What stays the same:
-#   - All env vars, volumes, depends_on, healthchecks from docker-compose.yml.
-#   - Network topology + ports.
-#   - Postgres/Redis as service containers (no in-process replacements).
-
-services:
-  platform:
-    build:
-      context: .
-      dockerfile: workspace-server/Dockerfile.dev
-    # Rebind source: edits under host's workspace-server/ propagate live.
-    # The named volume on go-build-cache speeds up first build per container.
-    volumes:
-      - ./workspace-server:/app/workspace-server
-      - go-build-cache:/root/.cache/go-build
-      - go-mod-cache:/go/pkg/mod
-    # Air signals the running binary on rebuild; ensure shell stops cleanly.
-    init: true
-    # Mark the service as dev-mode so the platform can short-circuit any
-    # behavior that's incompatible with hot-reload (e.g. background
-    # cron-style watchers that don't survive process restart). No-op
-    # today; reserved for future flag use.
-    environment:
-      MOLECULE_DEV_HOT_RELOAD: "1"
-
-volumes:
-  go-build-cache:
-  go-mod-cache:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -13,7 +13,6 @@ services:
      - pgdata:/var/lib/postgresql/data
    networks:
      - molecule-monorepo-net
-    restart: unless-stopped
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
      interval: 2s
@ -51,7 +50,6 @@ services:
      - redisdata:/data
    networks:
      - molecule-monorepo-net
-    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 2s
@ -128,10 +126,6 @@ services:
      REDIS_URL: redis://redis:6379
      PORT: "${PLATFORM_PORT:-8080}"
      PLATFORM_URL: "http://platform:${PLATFORM_PORT:-8080}"
-      # Container network namespace is already isolated; "all interfaces"
-      # inside the container = the bridge interface only. The fail-open
-      # default (127.0.0.1) would block host-to-container access.
-      BIND_ADDR: "${BIND_ADDR:-0.0.0.0}"
      # Default MOLECULE_ENV=development so the WorkspaceAuth / AdminAuth
      # middleware fail-open path activates when ADMIN_TOKEN is unset —
      # otherwise the canvas (which runs without a bearer in pure local
@ -201,28 +195,12 @@ services:
      # App private key — read-only bind-mount. The host-side path is
      # gitignored per .gitignore rules (/.secrets/ + *.pem).
      - ./.secrets/github-app.pem:/secrets/github-app.pem:ro
-      # Per-role persona credentials (molecule-core#242 local surface).
-      # Sourced at workspace creation time by org_import.go::loadPersonaEnvFile
-      # when a workspace.yaml carries `role: <name>`. The host-side dir is
-      # populated by the operator-host bootstrap kit (28 dev-tree personas);
-      # /etc/molecule-bootstrap/personas is the in-container path the
-      # platform expects (matches the prod tenant-EC2 path so the same code
-      # works in both modes).
-      #
-      # Read-only mount — workspace-server only reads, never writes here.
-      # If the host dir is empty/missing the platform's loadPersonaEnvFile
-      # silently no-ops per its existing semantics, so this mount is safe
-      # even on a fresh machine that hasn't run the bootstrap kit yet.
-      - ${MOLECULE_PERSONA_ROOT_HOST:-${HOME}/.molecule-ai/personas}:/etc/molecule-bootstrap/personas:ro
    ports:
      - "${PLATFORM_PUBLISH_PORT:-8080}:${PLATFORM_PORT:-8080}"
    networks:
      - molecule-monorepo-net
-    restart: unless-stopped
    healthcheck:
-      # Plain GET — `--spider` would issue HEAD, which returns 404 because
-      # /health is registered as GET only.
-      test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
      interval: 5s
      timeout: 5s
      retries: 10
@ -260,7 +238,7 @@ services:
    networks:
      - molecule-monorepo-net
    healthcheck:
-      test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 10
--- a/docs/adr/ADR-002-local-build-mode-via-registry-presence.md
+++ b/docs/adr/ADR-002-local-build-mode-via-registry-presence.md
@ -1,74 +0,0 @@
-# ADR-002: Local-build mode signalled by `MOLECULE_IMAGE_REGISTRY` presence
-
-* Status: Accepted (2026-05-07)
-* Issue: #63 (closes Task #194)
-* Decision: Hongming (CTO) + Claude Opus 4.7 (implementation)
-
-## Context
-
-Pre-2026-05-06, every Molecule deployment — both production tenants and OSS contributor laptops — pulled workspace-template-* container images from `ghcr.io/molecule-ai/`. Production tenants additionally set `MOLECULE_IMAGE_REGISTRY` to an AWS ECR mirror via Railway env / EC2 user-data, but the OSS default was the upstream GHCR org.
-
-On 2026-05-06 the `Molecule-AI` GitHub org was suspended (saved memory: `feedback_github_botring_fingerprint`). GHCR now returns **403 Forbidden** for every `molecule-ai/workspace-template-*` manifest. OSS contributors who clone `molecule-core` and run `go run ./workspace-server/cmd/server` cannot provision a workspace — every first provision fails with:
-
-```
-docker image "ghcr.io/molecule-ai/workspace-template-claude-code:latest" not found after pull attempt
-```
-
-Production tenants are unaffected (their `MOLECULE_IMAGE_REGISTRY` points at ECR, which we still control), but OSS onboarding is broken. Workspace template repos are intentionally separate from `molecule-core` (each runtime is OSS-shape and forkable), and they are mirrored to Gitea (`https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>`) — but the provisioner has no path that consumes Gitea source directly.
-
-## Decision
-
-When `MOLECULE_IMAGE_REGISTRY` is **unset** (or empty), the provisioner switches to a **local-build mode** that:
-
-1. Looks up the workspace-template repo's HEAD sha on Gitea via a single API call.
-2. Checks whether a SHA-pinned local image (`molecule-local/workspace-template-<runtime>:<sha12>`) already exists; if so, reuses it.
-3. Otherwise shallow-clones the repo into `~/.cache/molecule/workspace-template-build/<runtime>/<sha12>/` and runs `docker build --platform=linux/amd64 -t <tag> .`.
-4. Hands the SHA-pinned tag to Docker for ContainerCreate, bypassing the registry-pull path entirely.
-
-When `MOLECULE_IMAGE_REGISTRY` is **set**, behavior is unchanged: pull the image from that registry. Existing prod tenants and self-hosters who mirror to a private registry are not affected.
-
-## Consequences
-
-### Positive
-
-* **Zero-config OSS onboarding** — `git clone molecule-core && go run ./workspace-server/cmd/server` boots end-to-end without any registry credentials.
-* **Production tenants protected** — same env var, same semantics in SaaS-mode. Migration is a no-op.
-* **No new env var** — extending an existing var's semantics ("where to pull, OR build locally if absent") rather than introducing `MOLECULE_LOCAL_BUILD=1` keeps the surface small.
-* **SHA-pinned cache** — repeat builds are O(API-call); only template-repo HEAD changes invalidate.
-* **Production-parity image** — amd64 emulation on Apple Silicon honours `feedback_local_must_mimic_production`. The provisioner's existing `defaultImagePlatform()` already forces amd64 for parity; building amd64 locally lets that decision stay consistent.
-
-### Negative
-
-* **Conflates two concerns** — `MOLECULE_IMAGE_REGISTRY` now signals BOTH "where to pull" AND "build locally if absent." A future operator who unsets it expecting a hard error will instead get a slow first-provision. Documented in the runbook.
-* **First-provision is slow on Apple Silicon** — 5–10 min via QEMU emulation on the cold path. Mitigated by SHA-cache (subsequent runs are <1s lookup + 0s build).
-* **Coverage gap** — only 4 of 9 runtimes are mirrored to Gitea today (`claude-code`, `hermes`, `langgraph`, `autogen`). The other 5 fail with an actionable "not mirrored" error. Mirroring those repos is a separate task.
-* **Implicit trust boundary** — operator running `go run` implicitly trusts `molecule-ai/molecule-ai-workspace-template-*` repos on Gitea. This is the same trust they would extend to the GHCR images today; not a new attack surface.
-
-## Alternatives considered
-
-1. **New env var `MOLECULE_LOCAL_BUILD=1`** — explicit, but requires OSS contributors to know it exists. Violates the zero-config goal.
-2. **Push pre-built images to a Gitea container registry, mirror tag from upstream** — operationally cleaner but: (a) Gitea's container-registry add-on isn't deployed on the operator host, (b) defeats the OSS-contributor goal of "hack on the source, see your changes," since they'd still pull a stale image.
-3. **Embed Dockerfiles in molecule-core itself, drop the standalone template repos** — would work but breaks the OSS-shape principle; templates are intentionally separable, anyone-can-fork artifacts.
-4. **Build native arch on Apple Silicon (arm64) and drop the platform pin in local-mode** — fast, but creates `linux/arm64` images that diverge from the amd64-only prod runtime. Local-vs-prod debug behavior would diverge. Rejected per `feedback_local_must_mimic_production`.
-
-## Security review
-
-* **Gitea repo URL allowlist** — runtime name must be in the `knownRuntimes` allowlist (defence-in-depth against a future code path that lets cfg.Runtime carry untrusted input). Repo prefix is hardcoded to `https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-`; forks can override via `MOLECULE_LOCAL_TEMPLATE_REPO_PREFIX` (opt-in, default off).
-* **Token handling** — clones are anonymous over HTTPS by default (templates are public). `MOLECULE_GITEA_TOKEN`, if set, is passed via URL userinfo for the clone and as `Authorization: token` for the API call. The token is **masked in every log line** via `maskTokenInURL` / `maskTokenInString` and never appears in the cache dir path.
-* **No silent fallback** — if Gitea is unreachable or the runtime isn't mirrored, we return a clear error mentioning the repo URL and the missing runtime. We **never** fall back to GHCR/ECR (that would be a confusing bug for an OSS contributor who happened to have stale ECR creds in their docker config).
-* **Build-arg injection** — `docker build` is invoked with NO `--build-arg` from external input. Dockerfile is consumed as-is.
-* **Cache poisoning** — cache key is the Gitea HEAD sha + Dockerfile content; a force-push to the template repo's main branch regenerates the key on next run. Cache dir is per-user (`$HOME/.cache`), so cross-user attacks aren't relevant in single-user dev mode.
-
-## Versioning + back-compat
-
-* Existing prod tenants set `MOLECULE_IMAGE_REGISTRY=<ECR url>` → unchanged behavior.
-* Existing local installs that set the var → unchanged behavior.
-* Existing local installs that don't set it → switch to local-build path. Migration: none required (additive); first provision will take 5–10 min instead of failing.
-* No deprecations.
-
-## References
-
-* Issue #63 — feat(workspace-server): local-dev provisioner builds from Gitea source
-* Saved memory `feedback_local_must_mimic_production` — local docker must mimic prod, no bypasses
-* Saved memory `reference_post_suspension_pipeline` — full post-2026-05-06 stack shape
-* Saved memory `feedback_github_botring_fingerprint` — what got the org suspended
--- a/docs/architecture/backends.md
+++ b/docs/architecture/backends.md
@ -2,7 +2,7 @@

 **Status:** living document — update when you ship a feature that touches one backend.
 **Owner:** workspace-server + controlplane teams.
-**Last audit:** 2026-05-07 (plugin install/uninstall closed for EC2 backend via EIC SSH push to the bind-mounted `/configs/plugins/<name>/`, mirroring the Files API PR #1702 pattern).
+**Last audit:** 2026-05-05 (Claude agent — `provisionWorkspaceAuto` / `StopWorkspaceAuto` / `HasProvisioner` SoT pattern landed in PRs #2811 + #2824).

 ## Why this exists

@ -54,7 +54,7 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner
 | **Files API** | | | | |
 | List / Read / Write / Replace / Delete | `container_files.go`, `template_import.go` | `docker exec` + tar `CopyToContainer` | SSH via EIC tunnel (PR #1702) | ✅ parity as of 2026-04-22 (previously docker-only) |
 | **Plugins** | | | | |
-| Install / uninstall / list | `plugins_install.go` + `plugins_install_eic.go` | `deliverToContainer()` → exec+`CopyToContainer` on local container | `instance_id` set → EIC SSH push of the staged tarball into the EC2's bind-mounted `/configs/plugins/<name>/` (per `workspaceFilePathPrefix`), `chown 1000:1000`, restart | ✅ parity |
+| Install / uninstall / list | `plugins_install.go` | `deliverToContainer()` + volume rm | **gap — no live plugin delivery** | 🔴 **docker-only** |
 | **Terminal (WebSocket)** | | | | |
 | Dispatch | `terminal.go:90-105` | `instance_id=""` → `handleLocalConnect` → `docker attach` | `instance_id` set → `handleRemoteConnect` → EIC SSH + `docker exec` | ✅ parity (different implementations, same UX) |
 | **A2A proxy** | | | | |
--- a/docs/development/local-development.md
+++ b/docs/development/local-development.md
@ -1,41 +1,5 @@
 # Local Development

-## Workspace Template Images: Local-Build Mode (Issue #63)
-
-OSS contributors who run `molecule-core` locally do **not** need to authenticate to GHCR or AWS ECR. When the `MOLECULE_IMAGE_REGISTRY` env var is **unset**, the platform automatically:
-
-1. Looks up the HEAD sha of `https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>` (single API call, no clone).
-2. If a local image tagged `molecule-local/workspace-template-<runtime>:<sha12>` already exists, reuses it (cache hit).
-3. Otherwise, shallow-clones the repo into `~/.cache/molecule/workspace-template-build/<runtime>/<sha12>/` and runs `docker build --platform=linux/amd64 -t <tag> .`.
-4. Hands the SHA-pinned tag to Docker for `ContainerCreate`.
-
-**First-provision build time:** 5–10 min on Apple Silicon (amd64 emulation). Subsequent provisions hit the cache and start in seconds. Cache is invalidated automatically when the template repo's HEAD moves.
-
-**Currently mirrored on Gitea:** `claude-code`, `hermes`, `langgraph`, `autogen`. Other runtimes (`crewai`, `deepagents`, `codex`, `gemini-cli`, `openclaw`) fail with an actionable "not mirrored to Gitea" error pointing at the missing repo.
-
-**Production tenants are unaffected** — every prod tenant sets `MOLECULE_IMAGE_REGISTRY` to its private ECR mirror via Railway env / EC2 user-data, so the SaaS pull path stays identical.
-
-### Environment overrides
-
-| Var | Default | Use case |
-|-----|---------|----------|
-| `MOLECULE_IMAGE_REGISTRY` | (unset) | Set to a real registry URL to switch from local-build to SaaS-pull mode. |
-| `MOLECULE_LOCAL_BUILD_CACHE` | `~/.cache/molecule/workspace-template-build` | Override cache directory. |
-| `MOLECULE_LOCAL_TEMPLATE_REPO_PREFIX` | `https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-` | Point at a fork. |
-| `MOLECULE_GITEA_TOKEN` | (unset) | Required only if your fork has private template repos. |
-
-### Verifying a switch from the GHCR-retag stopgap
-
-Pre-fix, OSS contributors worked around the suspended GHCR org by manually retagging an `:latest` image. After this change, that workaround is **redundant**: simply unset `MOLECULE_IMAGE_REGISTRY` (or leave it unset), boot the platform, and provision a workspace. Logs will show:
-
-```
-Provisioner: local-build mode → using locally-built image molecule-local/workspace-template-claude-code:<sha12> for runtime claude-code
-local-build: cloning https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-claude-code → ...
-local-build: docker build done in <duration>
-```
-
-If you still see `ghcr.io/molecule-ai/...` in the boot log, double-check `env | grep MOLECULE_IMAGE_REGISTRY` — a stale shell export from the pre-fix workaround could keep SaaS-mode active.
-
 ## Starting the Stack

 ```bash
--- a/docs/engineering/ratelimit-observability.md
+++ b/docs/engineering/ratelimit-observability.md
@ -1,147 +0,0 @@
-# Rate-limit observability runbook
-
-> Companion to issue #64 ("RATE_LIMIT default re-tune analysis"). After
-> #60 deployed the per-tenant `keyFor` keying, the right RATE_LIMIT
-> default became data-dependent. This runbook documents the metrics +
-> queries an operator should run to confirm whether the current 600
-> req/min/key default is correct, too tight, or too loose.
-
-## What's already exposed
-
-The workspace-server's existing Prometheus middleware
-(`workspace-server/internal/metrics/metrics.go`) tracks every request
-on every path:
-
-```
-molecule_http_requests_total{method, path, status}      counter
-molecule_http_request_duration_seconds_total{method,path,status}  counter
-```
-
-Path is the matched route pattern (`/workspaces/:id/activity` etc), so
-high-cardinality workspace UUIDs do not explode the label space.
-
-The rate limiter middleware (#60, `workspace-server/internal/middleware/ratelimit.go`)
-also stamps every response with `X-RateLimit-Limit`, `X-RateLimit-Remaining`,
-and `X-RateLimit-Reset`. Operators with browser-side or proxy-side
-header capture can read per-request bucket state directly.
-
-No new instrumentation is needed for #64's acceptance criteria. The
-metric surface is sufficient — this runbook just collects the queries.
-
-## Queries to run after #60 deploys
-
-### 1. Is the bucket actually firing 429s?
-
-```promql
-sum(rate(molecule_http_requests_total{status="429"}[5m]))
-```
-
-If this is zero on a given tenant, the bucket isn't being hit. If it's
-sustained > 1/min, dig in.
-
-### 2. Which routes attract 429s?
-
-```promql
-topk(
-  10,
-  sum by (path) (
-    rate(molecule_http_requests_total{status="429"}[5m])
-  )
-)
-```
-
-Expected shape post-#60:
- `/workspaces/:id/activity` should be near zero — the canvas no longer
-  polls it on a 30s/60s/5s cadence (PRs #69 / #71 / #76).
- Probe / health / heartbeat paths should be ~0 (those routes have a
-  separate IP-fallback bucket).
-
-If `/workspaces/:id/activity` 429s persist post-PRs-69/71/76 deploy, the
-canvas isn't running the WS-subscriber path — investigate WS health
-on that tenant.
-
-### 3. Per-bucket-key inference (no direct exposure today)
-
-The bucket map itself is in-memory only; we deliberately do **not**
-expose `org:<uuid>` ↔ remaining-tokens because that map can include
-SHA-256 hashes of bearer tokens. A tenant that wants per-key visibility
-should rely on response headers (`X-RateLimit-Remaining` on every
-response from a given session is the bucket's view of that session).
-
-If you genuinely need server-side per-bucket counts for triage,
-file a follow-up — the proper shape is a `/internal/ratelimit-stats`
-endpoint that emits **counts per key prefix only** (e.g. `org:`, `tok:`,
-`ip:`), never the key payloads. Don't roll that ad-hoc; it's a security
-review surface.
-
-## Decision tree for the re-tune
-
-After 14 days of production traffic on a tenant, look at the queries
-above and walk this tree:
-
-```
-Q1: Is the 429 rate sustained > 0.1/sec on any tenant?
-  ├─ NO  → The 600 default has comfortable headroom. Either keep it,
-  │        or lower it carefully (300) ONLY if you have a documented
-  │        reason (e.g. a misbehaving client we want to throttle harder).
-  │        Default to "no change" — see #64 for the math.
-  └─ YES → Q2.
-
-Q2: Is the 429 rate concentrated on ONE tenant or spread across many?
-  ├─ ONE tenant → Operator override: set RATE_LIMIT=1200 or 1800 on that
-  │               tenant's box. Document in the tenant's ops note. The
-  │               default does not need to change.
-  └─ MANY tenants → Q3.
-
-Q3: Are the 429s on a route that polls (e.g. /activity / /peers)?
-  ├─ YES → Confirm PRs #69, #71, #76 have actually deployed to those
-  │         tenants. If they have and 429s persist, the canvas may have
-  │         a regression — do not raise RATE_LIMIT. File a canvas issue.
-  └─ NO  → 429s on mutating routes mean genuine load. Raise the default
-            to 1200 in `workspace-server/internal/router/router.go:54`.
-            Same PR should attach: the metric chart, the time window,
-            and a paragraph explaining what changed in our traffic shape.
-```
-
-## Alert rule template (drop-in for Prometheus)
-
-```yaml
-# Sustained 429s — file is the SLO trip-wire. If this fires, walk the
-# decision tree above. NB: the issue#64 acceptance criterion is "two
-# weeks of metrics"; this alert is the inverse — it tells you something
-# changed before the two weeks are up.
-groups:
-  - name: workspace-server-ratelimit
-    rules:
-      - alert: WorkspaceServerRateLimit429Sustained
-        expr: |
-          sum by (instance) (
-            rate(molecule_http_requests_total{status="429"}[10m])
-          ) > 0.1
-        for: 30m
-        labels:
-          severity: warning
-          owner: workspace-server
-        annotations:
-          summary: "{{ $labels.instance }} sustained 429s — see ratelimit-observability runbook"
-          runbook: "https://git.moleculesai.app/molecule-ai/molecule-core/blob/main/docs/engineering/ratelimit-observability.md"
-```
-
-Threshold rationale: 0.1 req/s = 6/min sustained over 10min. Below
-that, a 429 is almost certainly a transient burst that the canvas's
-retry-once handler at `canvas/src/lib/api.ts:55` already absorbs. The
-30m `for:` keeps the alert from chattering on a brief blip.
-
-## Companion probe script
-
-For one-off triage when an operator can reproduce the problem in their
-own browser, `scripts/edge-429-probe.sh` (#62) reproduces a canvas-
-sized burst against a tenant subdomain and dumps each 429's response
-shape so the operator can distinguish workspace-server bucket overflow
-from CF/Vercel edge rate-limiting without dashboard access.
-
-```sh
-./scripts/edge-429-probe.sh hongming.moleculesai.app --burst 80 --out /tmp/edge.txt
-```
-
-The script's report header explains how to read the output.
--- a/docs/integrations/runtime-native-mcp-status.md
+++ b/docs/integrations/runtime-native-mcp-status.md
@ -58,11 +58,8 @@ green — proves wire shape end-to-end against a real `hermes gateway run`
 subprocess + stub OpenAI-compat LLM. Caught + fixed a real `KeyError`
 in upstream `hermes_cli/tools_config.py` (PLATFORMS dict lookup
 crashed on plugin platforms) — fix on the patched fork branch
-(`molecule-ai/hermes-agent` `feat/platform-adapter-plugins`, commit
-`18e4849e`, hosted on Gitea at
-`https://git.moleculesai.app/molecule-ai/hermes-agent` — moved from the
-suspended `github.com/HongmingWang-Rabbit/hermes-agent`, see
-`molecule-ai/internal#72`). Upstream PR #18775 OPEN; CONFLICTING with main.
+(`HongmingWang-Rabbit/hermes-agent` `feat/platform-adapter-plugins`,
+commit `18e4849e`). Upstream PR #18775 OPEN; CONFLICTING with main.
 Not on critical path for our platform — patched fork is what the
 workspace image installs.

--- a/docs/runbooks/handlers-postgres-integration-port-collision.md
+++ b/docs/runbooks/handlers-postgres-integration-port-collision.md
@ -1,137 +0,0 @@
-# Runbook — Handlers Postgres Integration port-collision substrate
-
-**Status:** Resolved 2026-05-08 (PR for class B Hongming-owned CICD red sweep).
-
-## Symptom
-
-`Handlers Postgres Integration` workflow fails on staging push and PRs.
-Step `Apply migrations to Postgres service` shows:
-
-```
-psql: error: connection to server at "127.0.0.1", port 5432 failed: Connection refused
-```
-
-Job-cleanup step further down logs:
-
-```
-Cleaning up services for job Handlers Postgres Integration
-failed to remove container: Error response from daemon: No such container: <id>
-```
-
-…confirming the postgres service container was already gone before
-cleanup ran.
-
-## Root cause
-
-Our Gitea act_runner (operator host `5.78.80.188`,
-`/opt/molecule/runners/config.yaml`) sets:
-
-```yaml
-container:
-  network: host
-```
-
-…which act_runner applies to BOTH the job container AND every
-`services:` container in a workflow. Multiple workflow instances
-running concurrently across the 16 parallel runners each try to bind
-postgres on `0.0.0.0:5432`. The first wins; subsequent instances exit
-immediately with:
-
-```
-LOG:  could not bind IPv4 address "0.0.0.0": Address in use
-HINT: Is another postmaster already running on port 5432?
-FATAL: could not create any TCP/IP sockets
-```
-
-act_runner sets `AutoRemove:true` on service containers, so Docker
-garbage-collects them as soon as they exit. By the time the migrations
-step runs `pg_isready` / `psql`, the container is gone and connection
-refused.
-
-Reproduction (operator host):
-
-```bash
-docker run --rm -d --name pg-A --network host \
-  -e POSTGRES_PASSWORD=test postgres:15-alpine
-docker run -d --name pg-B --network host \
-  -e POSTGRES_PASSWORD=test postgres:15-alpine
-docker logs pg-B   # FATAL: could not create any TCP/IP sockets
-```
-
-## Why per-job override doesn't work
-
-The natural fix — per-job `container.network` override — is silently
-ignored by act_runner. The runner log emits:
-
-```
--network and --net in the options will be ignored.
-```
-
-This is a documented act_runner constraint: container network is a
-runner-wide setting, not per-job. Source: gitea/act_runner config docs
-+ vegardit/docker-gitea-act-runner issue #7.
-
-Flipping the global `container.network` to `bridge` would break every
-other workflow in the repo (cache server discovery,
-`molecule-monorepo-net` peer access during integration tests, etc.) —
-unacceptable blast radius for a per-test bug.
-
-## Fix shape
-
-`handlers-postgres-integration.yml` no longer uses `services: postgres:`.
-It launches a sibling postgres container manually on the existing
-`molecule-monorepo-net` bridge network with a per-run unique name:
-
-```yaml
-env:
-  PG_NAME: pg-handlers-${{ github.run_id }}-${{ github.run_attempt }}
-  PG_NETWORK: molecule-monorepo-net
-
-steps:
-  - name: Start sibling Postgres on bridge network
-    run: |
-      docker run -d --name "${PG_NAME}" --network "${PG_NETWORK}" \
-        ...
-        postgres:15-alpine
-      PG_HOST=$(docker inspect "${PG_NAME}" \
-        --format "{{(index .NetworkSettings.Networks \"${PG_NETWORK}\").IPAddress}}")
-      echo "PG_HOST=${PG_HOST}" >> "$GITHUB_ENV"
-
-  # … migrations + tests use ${PG_HOST}, not 127.0.0.1 …
-
-  - if: always() && …
-    name: Stop sibling Postgres
-    run: docker rm -f "${PG_NAME}" || true
-```
-
-The host-net job container can reach a bridge-net container via the
-bridge IP directly (verified manually, 2026-05-08). Two parallel runs
-use different names + different bridge IPs — no collision.
-
-## Future-proofing
-
-Other workflows that hit the same shape (any `services:` with a
-fixed-port image) will exhibit the same failure mode under
-host-network runner config. Translate using this same pattern:
-
-1. Drop the `services:` block.
-2. Use `${{ github.run_id }}-${{ github.run_attempt }}` for unique
-   container name.
-3. Launch on `molecule-monorepo-net` (already trusted bridge in
-   `docker-compose.infra.yml`).
-4. Read back the bridge IP via `docker inspect` and export as a step env.
-5. `if: always()` cleanup step at the end.
-
-If the count of such workflows grows, factor into a composite action
-(`./.github/actions/sibling-postgres`) so the substrate logic lives
-in one place.
-
-## Related
-
- Issue #88 (closed by #92): localhost → 127.0.0.1 fix that unmasked
-  this collision; the IPv6 fix is correct, port collision is the new
-  layer.
- Issue #94 created `molecule-monorepo-net` + `alpine:latest` as
-  prereqs.
- Saved memory `feedback_act_runner_github_server_url` documents
-  another act_runner-vs-GHA divergence (server URL).
--- a/manifest.json
+++ b/manifest.json
@ -1,46 +1,46 @@
 {
-  "_comment": "OSS surface registry — every repo listed here MUST be public on git.moleculesai.app. Layer-3 customer/private templates are NOT registered here; they are handled at provision-time via the per-tenant credential resolver (see internal#102 RFC). 'main' refs are pinned to tags before broad rollout.",
+  "_comment": "Pin refs to release tags for reproducible builds. 'main' is OK while all repos are internal.",
  "version": 1,
  "plugins": [
-    {"name": "browser-automation", "repo": "molecule-ai/molecule-ai-plugin-browser-automation", "ref": "main"},
-    {"name": "ecc", "repo": "molecule-ai/molecule-ai-plugin-ecc", "ref": "main"},
-    {"name": "gh-identity", "repo": "molecule-ai/molecule-ai-plugin-gh-identity", "ref": "main"},
-    {"name": "molecule-audit", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit", "ref": "main"},
-    {"name": "molecule-audit-trail", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit-trail", "ref": "main"},
-    {"name": "molecule-careful-bash", "repo": "molecule-ai/molecule-ai-plugin-molecule-careful-bash", "ref": "main"},
-    {"name": "molecule-compliance", "repo": "molecule-ai/molecule-ai-plugin-molecule-compliance", "ref": "main"},
-    {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-plugin-molecule-dev", "ref": "main"},
-    {"name": "molecule-freeze-scope", "repo": "molecule-ai/molecule-ai-plugin-molecule-freeze-scope", "ref": "main"},
-    {"name": "molecule-hitl", "repo": "molecule-ai/molecule-ai-plugin-molecule-hitl", "ref": "main"},
-    {"name": "molecule-prompt-watchdog", "repo": "molecule-ai/molecule-ai-plugin-molecule-prompt-watchdog", "ref": "main"},
-    {"name": "molecule-security-scan", "repo": "molecule-ai/molecule-ai-plugin-molecule-security-scan", "ref": "main"},
-    {"name": "molecule-session-context", "repo": "molecule-ai/molecule-ai-plugin-molecule-session-context", "ref": "main"},
-    {"name": "molecule-skill-code-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-code-review", "ref": "main"},
-    {"name": "molecule-skill-cron-learnings", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cron-learnings", "ref": "main"},
-    {"name": "molecule-skill-cross-vendor-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cross-vendor-review", "ref": "main"},
-    {"name": "molecule-skill-llm-judge", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-llm-judge", "ref": "main"},
-    {"name": "molecule-skill-update-docs", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-update-docs", "ref": "main"},
-    {"name": "molecule-workflow-retro", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-retro", "ref": "main"},
-    {"name": "molecule-workflow-triage", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-triage", "ref": "main"},
-    {"name": "superpowers", "repo": "molecule-ai/molecule-ai-plugin-superpowers", "ref": "main"}
+    {"name": "browser-automation", "repo": "Molecule-AI/molecule-ai-plugin-browser-automation", "ref": "main"},
+    {"name": "ecc", "repo": "Molecule-AI/molecule-ai-plugin-ecc", "ref": "main"},
+    {"name": "gh-identity", "repo": "Molecule-AI/molecule-ai-plugin-gh-identity", "ref": "main"},
+    {"name": "molecule-audit", "repo": "Molecule-AI/molecule-ai-plugin-molecule-audit", "ref": "main"},
+    {"name": "molecule-audit-trail", "repo": "Molecule-AI/molecule-ai-plugin-molecule-audit-trail", "ref": "main"},
+    {"name": "molecule-careful-bash", "repo": "Molecule-AI/molecule-ai-plugin-molecule-careful-bash", "ref": "main"},
+    {"name": "molecule-compliance", "repo": "Molecule-AI/molecule-ai-plugin-molecule-compliance", "ref": "main"},
+    {"name": "molecule-dev", "repo": "Molecule-AI/molecule-ai-plugin-molecule-dev", "ref": "main"},
+    {"name": "molecule-freeze-scope", "repo": "Molecule-AI/molecule-ai-plugin-molecule-freeze-scope", "ref": "main"},
+    {"name": "molecule-hitl", "repo": "Molecule-AI/molecule-ai-plugin-molecule-hitl", "ref": "main"},
+    {"name": "molecule-prompt-watchdog", "repo": "Molecule-AI/molecule-ai-plugin-molecule-prompt-watchdog", "ref": "main"},
+    {"name": "molecule-security-scan", "repo": "Molecule-AI/molecule-ai-plugin-molecule-security-scan", "ref": "main"},
+    {"name": "molecule-session-context", "repo": "Molecule-AI/molecule-ai-plugin-molecule-session-context", "ref": "main"},
+    {"name": "molecule-skill-code-review", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-code-review", "ref": "main"},
+    {"name": "molecule-skill-cron-learnings", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-cron-learnings", "ref": "main"},
+    {"name": "molecule-skill-cross-vendor-review", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-cross-vendor-review", "ref": "main"},
+    {"name": "molecule-skill-llm-judge", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-llm-judge", "ref": "main"},
+    {"name": "molecule-skill-update-docs", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-update-docs", "ref": "main"},
+    {"name": "molecule-workflow-retro", "repo": "Molecule-AI/molecule-ai-plugin-molecule-workflow-retro", "ref": "main"},
+    {"name": "molecule-workflow-triage", "repo": "Molecule-AI/molecule-ai-plugin-molecule-workflow-triage", "ref": "main"},
+    {"name": "superpowers", "repo": "Molecule-AI/molecule-ai-plugin-superpowers", "ref": "main"}
  ],
  "workspace_templates": [
-    {"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "main"},
-    {"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
-    {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
-    {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
-    {"name": "langgraph", "repo": "molecule-ai/molecule-ai-workspace-template-langgraph", "ref": "main"},
-    {"name": "crewai", "repo": "molecule-ai/molecule-ai-workspace-template-crewai", "ref": "main"},
-    {"name": "autogen", "repo": "molecule-ai/molecule-ai-workspace-template-autogen", "ref": "main"},
-    {"name": "deepagents", "repo": "molecule-ai/molecule-ai-workspace-template-deepagents", "ref": "main"},
-    {"name": "gemini-cli", "repo": "molecule-ai/molecule-ai-workspace-template-gemini-cli", "ref": "main"}
+    {"name": "claude-code-default", "repo": "Molecule-AI/molecule-ai-workspace-template-claude-code", "ref": "main"},
+    {"name": "hermes", "repo": "Molecule-AI/molecule-ai-workspace-template-hermes", "ref": "main"},
+    {"name": "openclaw", "repo": "Molecule-AI/molecule-ai-workspace-template-openclaw", "ref": "main"},
+    {"name": "codex", "repo": "Molecule-AI/molecule-ai-workspace-template-codex", "ref": "main"},
+    {"name": "langgraph", "repo": "Molecule-AI/molecule-ai-workspace-template-langgraph", "ref": "main"},
+    {"name": "crewai", "repo": "Molecule-AI/molecule-ai-workspace-template-crewai", "ref": "main"},
+    {"name": "autogen", "repo": "Molecule-AI/molecule-ai-workspace-template-autogen", "ref": "main"},
+    {"name": "deepagents", "repo": "Molecule-AI/molecule-ai-workspace-template-deepagents", "ref": "main"},
+    {"name": "gemini-cli", "repo": "Molecule-AI/molecule-ai-workspace-template-gemini-cli", "ref": "main"}
  ],
  "org_templates": [
-    {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
-    {"name": "free-beats-all", "repo": "molecule-ai/molecule-ai-org-template-free-beats-all", "ref": "main"},
-    {"name": "medo-smoke", "repo": "molecule-ai/molecule-ai-org-template-medo-smoke", "ref": "main"},
-    {"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
-    {"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"},
-    {"name": "mock-bigorg", "repo": "molecule-ai/molecule-ai-org-template-mock-bigorg", "ref": "main"}
+    {"name": "molecule-dev", "repo": "Molecule-AI/molecule-ai-org-template-molecule-dev", "ref": "main"},
+    {"name": "free-beats-all", "repo": "Molecule-AI/molecule-ai-org-template-free-beats-all", "ref": "main"},
+    {"name": "medo-smoke", "repo": "Molecule-AI/molecule-ai-org-template-medo-smoke", "ref": "main"},
+    {"name": "molecule-worker-gemini", "repo": "Molecule-AI/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
+    {"name": "reno-stars", "repo": "Molecule-AI/molecule-ai-org-template-reno-stars", "ref": "main"},
+    {"name": "ux-ab-lab", "repo": "Molecule-AI/molecule-ai-org-template-ux-ab-lab", "ref": "main"}
  ]
 }
--- a/scripts/check-stale-promote-pr.sh
+++ b/scripts/check-stale-promote-pr.sh
@ -17,23 +17,12 @@
 #
 # Used by .github/workflows/auto-promote-stale-alarm.yml. Logic lives
 # here (not inline in the workflow YAML) so we can:
-#   - Unit-test it with a fixture (see test-check-stale-promote-pr.sh)
+#   - Unit-test it with a stubbed `gh` (see test-check-stale-promote-pr.sh)
 #   - Run it ad-hoc by an operator: `scripts/check-stale-promote-pr.sh`
 #   - Reuse the same surface in any sibling workflow that needs the same
 #     check (SSOT — one detector, many callers).
 #
-# Requires: `curl`, `jq`. `GITEA_TOKEN` (or `GITHUB_TOKEN` / `GH_TOKEN`
-# for back-compat) in the workflow context. Reads `GITHUB_SERVER_URL`
-# / `GITEA_API_URL` for the Gitea base, defaulting to
-# https://git.moleculesai.app/api/v1.
-#
-# Post-2026-05-06 (Gitea migration, issue #75): the previous version
-# called `gh pr list/view/comment`, all of which hit GitHub.com's
-# GraphQL or /api/v3 REST shapes. Gitea exposes /api/v1/ only (no
-# GraphQL → 405, no /api/v3 → 404). So this script now talks to the
-# Gitea v1 API directly via curl. The fixture-driven unit tests are
-# unchanged — they bypass the live fetch via PR_FIXTURE and still pass
-# the historical (GitHub-shape) JSON which `detect_stale` consumes.
+# Requires: `gh` CLI, `jq`. `GH_TOKEN` env in the workflow context.

 set -euo pipefail

@ -47,15 +36,14 @@ set -euo pipefail
 # alarming. Override via env for tests + edge ops.
 STALE_HOURS="${STALE_HOURS:-4}"

-# Repo defaults to GITHUB_REPOSITORY (act_runner sets this in workflow
-# context). Tests pass --repo explicitly.
+# Repo defaults to the current `gh` context. Tests pass --repo explicitly.
 REPO="${GITHUB_REPOSITORY:-}"

 # Whether to post a comment to the PR. Off by default to avoid noise on
 # manual ad-hoc runs; the cron workflow turns it on.
 POST_COMMENT="${POST_COMMENT:-false}"

-# Where to read the open-PR JSON from. Empty = call Gitea live. Tests
+# Where to read the open-PR JSON from. Empty = call `gh` live. Tests
 # point this at a fixture file.
 PR_FIXTURE="${PR_FIXTURE:-}"

@ -63,17 +51,6 @@ PR_FIXTURE="${PR_FIXTURE:-}"
 # the staleness math is deterministic.
 NOW_OVERRIDE="${NOW_OVERRIDE:-}"

-# Gitea API base. act_runner forwards github.server_url as
-# GITHUB_SERVER_URL; for the molecule-ai fleet that's
-# https://git.moleculesai.app. Append /api/v1 to get the REST root.
-# Override directly via GITEA_API_URL for tests / non-default hosts.
-GITEA_API_URL="${GITEA_API_URL:-${GITHUB_SERVER_URL:-https://git.moleculesai.app}/api/v1}"
-
-# Token. Workflow context sets GITHUB_TOKEN; we accept GITEA_TOKEN as
-# the explicit name and GH_TOKEN for back-compat with operator habits
-# from the GitHub era. First non-empty wins.
-GITEA_TOKEN="${GITEA_TOKEN:-${GITHUB_TOKEN:-${GH_TOKEN:-}}}"
-
 while [ $# -gt 0 ]; do
  case "$1" in
    --repo) REPO="$2"; shift 2 ;;
@ -106,7 +83,7 @@ now_epoch() {
  fi
 }

-# Parse RFC3339 timestamps the way Gitea / GitHub emit them (e.g.
+# Parse RFC3339 timestamps the way GitHub emits them (e.g.
 # "2026-05-05T23:15:00Z"). gnu-date uses -d, bsd-date uses -j -f. Cover
 # both because the workflow runs on ubuntu-latest (gnu) but operators
 # may run this script on macOS (bsd).
@ -129,100 +106,14 @@ to_epoch() {
 # Fetch open auto-promote PRs
 # -----------------------------------------------------------------------------

-# Gitea v1 returns PRs with the canonical Gitea shape (number, title,
-# created_at, html_url, mergeable, state). The previous GitHub-CLI
-# version returned a derived `mergeStateStatus` / `reviewDecision`
-# pair which only GitHub computes — Gitea doesn't expose them
-# natively. Rebuild equivalents:
-#
-#   mergeStateStatus = BLOCKED  ↔ Gitea: state==open AND mergeable==true
-#                                  AND no APPROVED review yet
-#                                  (i.e. branch protection is gating
-#                                  the auto-merge pending an approval)
-#   reviewDecision   = REVIEW_REQUIRED  ↔ Gitea: 0 APPROVED reviews
-#
-# This mirrors the SAME silent-block failure mode the GitHub version
-# detected: auto-merge armed, branch protection requires 1 review,
-# nobody's approved yet.
-#
-# Implementation: pull the open PR list base=main, then for each PR
-# pull /pulls/{n}/reviews and synthesize the GitHub-shape JSON the
-# rest of the script + the test fixtures consume.
 fetch_prs() {
  if [ -n "$PR_FIXTURE" ]; then
    cat "$PR_FIXTURE"
    return 0
  fi
-  if [ -z "$GITEA_TOKEN" ]; then
-    echo "::error::GITEA_TOKEN / GITHUB_TOKEN unset — cannot fetch PRs from $GITEA_API_URL" >&2
-    return 1
-  fi
-  local prs_json
-  prs_json="$(curl --fail-with-body -sS \
-    -H "Authorization: token ${GITEA_TOKEN}" \
-    -H "Accept: application/json" \
-    "${GITEA_API_URL}/repos/${REPO}/pulls?state=open&base=main&limit=50" \
-    2>/dev/null)" || {
-    echo "::error::Failed to fetch PRs from ${GITEA_API_URL}/repos/${REPO}/pulls" >&2
-    return 1
-  }
-
-  # Filter to head=staging (the auto-promote shape) and synthesize
-  # mergeStateStatus + reviewDecision per PR. Approval count via
-  # /pulls/{n}/reviews. Errors fall through to 0-approvals (treated
-  # as REVIEW_REQUIRED) preserving the existing "fail-safe — alarm if
-  # uncertain" semantic.
-  local synthesized="[]"
-  while IFS= read -r pr; do
-    [ -z "$pr" ] && continue
-    [ "$pr" = "null" ] && continue
-    local num
-    num="$(printf '%s' "$pr" | jq -r '.number')"
-    [ -z "$num" ] && continue
-    [ "$num" = "null" ] && continue
-    local approved_count
-    approved_count="$(curl --fail-with-body -sS \
-      -H "Authorization: token ${GITEA_TOKEN}" \
-      -H "Accept: application/json" \
-      "${GITEA_API_URL}/repos/${REPO}/pulls/${num}/reviews" 2>/dev/null \
-      | jq '[.[] | select(.state == "APPROVED" and (.dismissed // false) == false)] | length' \
-      2>/dev/null || echo 0)"
-    local mergeable
-    mergeable="$(printf '%s' "$pr" | jq -r '.mergeable')"
-    local merge_state="UNKNOWN"
-    local review_decision="REVIEW_REQUIRED"
-    if [ "$mergeable" = "true" ]; then
-      if [ "$approved_count" -ge 1 ]; then
-        merge_state="CLEAN"
-        review_decision="APPROVED"
-      else
-        # mergeable but no approving review — exactly the wedge state
-        # the alarm targets.
-        merge_state="BLOCKED"
-        review_decision="REVIEW_REQUIRED"
-      fi
-    else
-      # not mergeable (conflicts, behind, failed checks) — different
-      # failure mode, the author owns the fix; the alarm doesn't fire.
-      merge_state="DIRTY"
-      review_decision="REVIEW_REQUIRED"
-    fi
-    synthesized="$(printf '%s' "$synthesized" \
-      | jq -c --argjson pr "$pr" \
-              --arg ms "$merge_state" \
-              --arg rd "$review_decision" \
-              '. + [{
-                 number: $pr.number,
-                 title: $pr.title,
-                 createdAt: $pr.created_at,
-                 mergeStateStatus: $ms,
-                 reviewDecision: $rd,
-                 url: $pr.html_url
-              }]')"
-  done < <(printf '%s' "$prs_json" \
-    | jq -c '.[] | select(.head.ref == "staging")' 2>/dev/null)
-
-  printf '%s\n' "$synthesized"
+  gh pr list --repo "$REPO" \
+    --base main --head staging --state open \
+    --json number,title,createdAt,mergeStateStatus,reviewDecision,url
 }

 # -----------------------------------------------------------------------------
@ -280,40 +171,18 @@ post_comment() {
  if [ "$POST_COMMENT" != "true" ]; then
    return 0
  fi
-  if [ -z "$GITEA_TOKEN" ]; then
-    echo "::warning::GITEA_TOKEN unset — cannot post stale-alarm comment on PR #$pr_num" >&2
-    return 0
-  fi
  # Idempotency: only one alarm comment per PR. Look for the marker
-  # string in existing comments before posting a new one. Gitea's
-  # /repos/{owner}/{repo}/issues/{n}/comments returns the same shape
-  # for issues + PRs (PRs are issues internally on Gitea, same as
-  # GitHub's REST).
+  # string in existing comments before posting a new one.
  local existing
-  existing="$(curl --fail-with-body -sS \
-    -H "Authorization: token ${GITEA_TOKEN}" \
-    -H "Accept: application/json" \
-    "${GITEA_API_URL}/repos/${REPO}/issues/${pr_num}/comments?limit=50" 2>/dev/null \
-    | jq -r '.[] | select(.body | test("scripts/check-stale-promote-pr.sh per issue #2975")) | .id' \
+  existing="$(gh pr view "$pr_num" --repo "$REPO" --json comments \
+    --jq '.comments[] | select(.body | test("scripts/check-stale-promote-pr.sh per issue #2975")) | .databaseId' \
    | head -n1)"
  if [ -n "$existing" ]; then
    echo "::notice::PR #$pr_num already has a stale-alarm comment ($existing) — not re-posting"
    return 0
  fi
-  local body
-  body="$(comment_body "$age_h")"
-  if curl --fail-with-body -sS \
-      -X POST \
-      -H "Authorization: token ${GITEA_TOKEN}" \
-      -H "Accept: application/json" \
-      -H "Content-Type: application/json" \
-      "${GITEA_API_URL}/repos/${REPO}/issues/${pr_num}/comments" \
-      -d "$(jq -nc --arg b "$body" '{body: $b}')" \
-      >/dev/null 2>&1; then
-    echo "::notice::Posted stale-alarm comment on PR #$pr_num (age=${age_h}h)"
-  else
-    echo "::warning::Failed to POST stale-alarm comment on PR #$pr_num" >&2
-  fi
+  comment_body "$age_h" | gh pr comment "$pr_num" --repo "$REPO" --body-file -
+  echo "::notice::Posted stale-alarm comment on PR #$pr_num (age=${age_h}h)"
 }

 # -----------------------------------------------------------------------------
--- a/scripts/clone-manifest.sh
+++ b/scripts/clone-manifest.sh
@ -6,26 +6,6 @@
 #   ./scripts/clone-manifest.sh <manifest.json> <ws-templates-dir> <org-templates-dir> <plugins-dir>
 #
 # Requires: git, jq (lighter than python3 — ~2MB vs ~50MB in Alpine)
-#
-# Auth (optional):
-#   Post-2026-05-08 (#192): every repo in manifest.json is public on
-#   git.moleculesai.app. Anonymous clone works for the entire registered
-#   set. The OSS-surface contract is recorded in manifest.json's _comment
-#   — Layer-3 customer/private templates (e.g. reno-stars) are NOT in the
-#   manifest; they are handled at provision-time via the per-tenant
-#   credential resolver (internal#102 RFC).
-#
-#   MOLECULE_GITEA_TOKEN is therefore optional today. Kept supported for
-#   two reasons: (a) historical CI configs that still inject
-#   AUTO_SYNC_TOKEN remain harmless, (b) reserved for the case where a
-#   private internal-only template is later registered via a ci-readonly
-#   team grant — review must explicitly sign off on that, since it
-#   violates the public-OSS-surface contract.
-#
-#   The token (when set) never enters the Docker image: this script runs
-#   in the trusted CI context BEFORE `docker buildx build`, populates
-#   .tenant-bundle-deps/, then `Dockerfile.tenant` COPYs from there with
-#   the .git directories already stripped (see line ~67 below).

 set -euo pipefail

@ -65,27 +45,11 @@ clone_category() {
            continue
        fi

-        # Build the clone URL. When MOLECULE_GITEA_TOKEN is set (CI path)
-        # embed it as basic-auth so private repos succeed. The username
-        # part ("oauth2") is conventional and ignored by Gitea — only the
-        # token-as-password is verified.
-        #
-        # manifest.json was migrated to lowercase org slugs on
-        # 2026-05-07 (post-suspension reconciliation), so we use $repo
-        # verbatim — no on-the-fly tolower transform needed.
-        if [ -n "${MOLECULE_GITEA_TOKEN:-}" ]; then
-            clone_url="https://oauth2:${MOLECULE_GITEA_TOKEN}@git.moleculesai.app/${repo}.git"
-            display_url="https://oauth2:***@git.moleculesai.app/${repo}.git"
-        else
-            clone_url="https://git.moleculesai.app/${repo}.git"
-            display_url="$clone_url"
-        fi
-
-        echo "  cloning $display_url -> $target_dir/$name (ref=$ref)"
+        echo "  cloning $repo -> $target_dir/$name (ref=$ref)"
        if [ "$ref" = "main" ]; then
-            git clone --depth=1 -q "$clone_url" "$target_dir/$name"
+            git clone --depth=1 -q "https://github.com/${repo}.git" "$target_dir/$name"
        else
-            git clone --depth=1 -q --branch "$ref" "$clone_url" "$target_dir/$name"
+            git clone --depth=1 -q --branch "$ref" "https://github.com/${repo}.git" "$target_dir/$name"
        fi
        CLONED=$((CLONED + 1))
        i=$((i + 1))
--- a/scripts/edge-429-probe.sh
+++ b/scripts/edge-429-probe.sh
@ -1,155 +0,0 @@
-#!/usr/bin/env bash
-# edge-429-probe.sh — capture 429 origin (workspace-server vs CF/Vercel edge)
-# during a simulated canvas-burst against a tenant subdomain.
-#
-# Issue molecule-core#62. The post-#60 verification step asks an
-# operator with CF/Vercel dashboard access to confirm whether the
-# layout-chunk 429s observed in DevTools were:
-#   (a) workspace-server bucket overflow (closes once #60 deploys), or
-#   (b) actual edge-layer rate-limiting (CF or Vercel).
-#
-# This script doesn't need dashboard access. It reproduces the burst
-# pattern locally and dumps every 429's response shape so the operator
-# can distinguish (a) from (b) by inspection: workspace-server emits a
-# JSON body, CF emits HTML, Vercel emits a different HTML. Headers tell
-# the same story (cf-ray vs x-vercel-*).
-#
-# Usage:
-#   ./scripts/edge-429-probe.sh <tenant-host> [--burst N] [--waves N] [--pause SECS] [--out file]
-#
-# Example:
-#   ./scripts/edge-429-probe.sh hongming.moleculesai.app --burst 80 --out /tmp/edge.txt
-#
-# The script is read-only against the target — it only issues GETs to
-# public-by-design endpoints. No mutating requests, no credential use.
-
-set -euo pipefail
-
-# ── Help / usage handling first, before positional capture ────────────────────
-case "${1:-}" in
-  -h|--help|"")
-    sed -n '/^# edge-429-probe.sh/,/^$/p' "$0" | sed 's/^# \{0,1\}//'
-    exit 0
-    ;;
-esac
-
-HOST="$1"; shift
-BURST=80
-WAVES=3
-WAVE_PAUSE=2
-OUT=""
-
-while [ "${1:-}" != "" ]; do
-  case "$1" in
-    --burst) BURST="$2"; shift 2 ;;
-    --waves) WAVES="$2"; shift 2 ;;
-    --pause) WAVE_PAUSE="$2"; shift 2 ;;
-    --out)   OUT="$2";   shift 2 ;;
-    -h|--help)
-      sed -n '/^# edge-429-probe.sh/,/^$/p' "$0" | sed 's/^# \{0,1\}//'
-      exit 0
-      ;;
-    *) echo "unknown arg: $1" >&2; exit 2 ;;
-  esac
-done
-
-# ── Endpoint discovery ────────────────────────────────────────────────────────
-echo "→ Discovering a layout-chunk URL from canvas root..." >&2
-ROOT_BODY=$(curl -fsSL --max-time 10 "https://${HOST}/" 2>/dev/null || true)
-LAYOUT_PATH=$(echo "$ROOT_BODY" \
-  | grep -oE '/_next/static/chunks/layout-[A-Za-z0-9_-]+\.js' \
-  | head -1 || true)
-if [ -z "$LAYOUT_PATH" ]; then
-  LAYOUT_PATH="/_next/static/chunks/layout-probe-not-found.js"
-  echo "  (no layout chunk discovered — using sentinel path; 404 on this is expected)" >&2
-else
-  echo "  layout chunk: $LAYOUT_PATH" >&2
-fi
-
-# Probe URL: a generic activity endpoint. The rate-limiter middleware
-# runs BEFORE workspace-id validation, so unauth/invalid-id requests
-# still hit the bucket.
-ACTIVITY_PATH="/workspaces/00000000-0000-0000-0000-000000000000/activity?probe=edge-429"
-
-# ── Fire one curl, write a single-line JSON-ish status record to stdout ──────
-# Inlined into xargs as a heredoc-style command rather than a function so
-# the function-export pitfalls (some shells lose `export -f` across xargs)
-# don't apply. Each output line is a parseable record; failed curls emit
-# a curl_err record so request volume is preserved.
-TMP_RESULTS="$(mktemp -t edge-429-probe.XXXXXX)"
-trap 'rm -f "$TMP_RESULTS"' EXIT
-
-run_burst() {
-  # $1 = path; $2 = label; $3 = wave_id
-  local path="$1" label="$2" wave="$3"
-  local i
-  for i in $(seq 1 "$BURST"); do
-    {
-      out=$(curl -sS --max-time 10 -o /dev/null \
-        -w 'status=%{http_code} size=%{size_download} time=%{time_total} server=%{header.server} cf_ray=%{header.cf-ray} x_vercel=%{header.x-vercel-id} retry_after=%{header.retry-after} content_type=%{header.content-type} x_ratelimit_limit=%{header.x-ratelimit-limit} x_ratelimit_remaining=%{header.x-ratelimit-remaining} x_ratelimit_reset=%{header.x-ratelimit-reset}\n' \
-        "https://${HOST}${path}" 2>/dev/null) || out="status=curl_err"
-      printf 'label=%s-%s-%s %s\n' "$label" "$wave" "$i" "$out" >> "$TMP_RESULTS"
-    } &
-  done
-  wait
-}
-
-emit() {
-  if [ -n "$OUT" ]; then
-    printf '%s\n' "$*" >> "$OUT"
-  else
-    printf '%s\n' "$*"
-  fi
-}
-
-if [ -n "$OUT" ]; then : > "$OUT"; fi
-
-emit "# edge-429-probe report"
-emit "# host=$HOST burst=$BURST waves=$WAVES pause=${WAVE_PAUSE}s"
-emit "# layout_path=$LAYOUT_PATH"
-emit "# activity_path=$ACTIVITY_PATH"
-emit "# generated=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
-emit ""
-
-for wave in $(seq 1 "$WAVES"); do
-  emit "## wave $wave"
-  : > "$TMP_RESULTS"
-  run_burst "$LAYOUT_PATH" "layout" "$wave"
-  run_burst "$ACTIVITY_PATH" "activity" "$wave"
-  while read -r line; do
-    emit "  $line"
-  done < "$TMP_RESULTS"
-  if [ "$wave" -lt "$WAVES" ]; then
-    sleep "$WAVE_PAUSE"
-  fi
-done
-
-emit ""
-emit "## summary — how to read the report"
-emit "#   status=429 + content_type starts with application/json + x_ratelimit_limit set"
-emit "#     => workspace-server bucket overflow. Closes when #60 deploys."
-emit "#   status=429 + cf_ray set + content_type=text/html"
-emit "#     => Cloudflare WAF / rate-limit. Audit dashboard rules per #62."
-emit "#   status=429 + x_vercel set + content_type=text/html"
-emit "#     => Vercel edge / Bot Fight Mode. Audit Vercel project per #62."
-emit "#   status=429 with no server/cf_ray/x_vercel"
-emit "#     => corporate proxy or VPN. Not actionable in this repo."
-
-if [ -n "$OUT" ]; then
-  echo "→ Report written to $OUT" >&2
-  # Match only data lines (begin with two-space indent + "label="),
-  # not the summary's reference text which also mentions "status=429".
-  # grep -c outputs "0" + exits 1 when zero matches; `|| true` masks
-  # the exit status so set -e doesn't trip without losing the count.
-  total=$(grep -c '^  label=' "$OUT" 2>/dev/null || true)
-  total429=$(grep -c '^  label=.*status=429' "$OUT" 2>/dev/null || true)
-  total=${total:-0}
-  total429=${total429:-0}
-  echo "→ Totals: ${total429} of ${total} requests returned 429" >&2
-  if [ "${total429}" -gt 0 ]; then
-    echo "→ Per-label 429 counts:" >&2
-    grep '^  label=.*status=429' "$OUT" \
-      | sed -E 's/^  label=([^-]+).*/  \1/' \
-      | sort | uniq -c >&2
-  fi
-fi
--- a/scripts/ops/check_migration_collisions.py
+++ b/scripts/ops/check_migration_collisions.py
@ -19,15 +19,9 @@ Exit codes:
    0  — no collisions
    1  — collision detected; output names the conflicting PR(s) for the author

-Designed to run from a Gitea Actions PR check. Reads PR metadata via direct
-HTTP calls to Gitea's REST API (`/api/v1/`), which on the molecule-ai fleet
-lives at https://git.moleculesai.app. Runs in under 10s against a typical PR.
-
-Post-2026-05-06 (Gitea migration, issue #75): the previous version called
-the GitHub CLI (``gh pr list``, ``gh pr diff``). On Gitea those calls hit
-either the GraphQL endpoint (HTTP 405) or /api/v3 (HTTP 404). This module
-now talks to /api/v1 directly via urllib so it works against any Gitea
-host without a `gh` install or extra dependencies.
+Designed to run from a GitHub Actions PR check. Reads PR metadata via the
+GitHub CLI (gh) which is preinstalled on ubuntu-latest runners. Runs in
+under 10s against a typical PR.
 """

 from __future__ import annotations
@ -37,70 +31,12 @@ import os
 import re
 import subprocess
 import sys
-import urllib.error
-import urllib.parse
-import urllib.request
 from pathlib import Path

 MIGRATIONS_DIR = "workspace-server/migrations"
 MIGRATION_FILE_RE = re.compile(r"^(\d+)_[^/]+\.(up|down)\.sql$")


-def _gitea_api_url() -> str:
-    """Resolve the Gitea API base URL.
-
-    act_runner forwards github.server_url as GITHUB_SERVER_URL; for the
-    molecule-ai fleet that's https://git.moleculesai.app. Append /api/v1
-    to get the REST root. Override directly via GITEA_API_URL for tests
-    or non-default hosts.
-    """
-    env_override = os.environ.get("GITEA_API_URL", "").rstrip("/")
-    if env_override:
-        return env_override
-    server = os.environ.get("GITHUB_SERVER_URL", "https://git.moleculesai.app").rstrip("/")
-    return f"{server}/api/v1"
-
-
-def _gitea_token() -> str:
-    """Resolve the Gitea token from env. GITEA_TOKEN wins; falls back
-    to GITHUB_TOKEN (set by act_runner) and GH_TOKEN (operator habit
-    from the GitHub era)."""
-    return (
-        os.environ.get("GITEA_TOKEN")
-        or os.environ.get("GITHUB_TOKEN")
-        or os.environ.get("GH_TOKEN")
-        or ""
-    )
-
-
-def _gitea_get(path: str, params: dict[str, str] | None = None) -> bytes | None:
-    """GET against /api/v1; returns response body or None on HTTP error.
-
-    Errors return None (not raise) because callers handle missing data
-    by emitting an actionable workflow message rather than crashing the
-    PR check on a transient API blip.
-    """
-    base = _gitea_api_url()
-    qs = ""
-    if params:
-        qs = "?" + urllib.parse.urlencode(params)
-    url = f"{base}/{path.lstrip('/')}{qs}"
-    req = urllib.request.Request(url)
-    token = _gitea_token()
-    if token:
-        req.add_header("Authorization", f"token {token}")
-    req.add_header("Accept", "application/json")
-    try:
-        with urllib.request.urlopen(req, timeout=20) as resp:  # noqa: S310
-            return resp.read()
-    except urllib.error.HTTPError as e:
-        sys.stderr.write(f"Gitea API HTTP {e.code} on {path}: {e.reason}\n")
-        return None
-    except (urllib.error.URLError, TimeoutError) as e:
-        sys.stderr.write(f"Gitea API network error on {path}: {e}\n")
-        return None
-
-
 def run(cmd: list[str], check: bool = True) -> str:
    """Run a subprocess and return stdout. Raise on non-zero when check=True."""
    result = subprocess.run(cmd, capture_output=True, text=True)
@ -160,49 +96,32 @@ def open_prs_with_migration_prefix(
    repo: str, prefix: int, exclude_pr: int
 ) -> list[dict]:
    """Return open PRs (other than `exclude_pr`) that add a migration with
-    `prefix`. Walks open PRs via Gitea's `/repos/{owner}/{repo}/pulls` and
-    pulls each one's changed-file list via `/pulls/{n}/files`. The cost is
-    bounded by open-PR count, which is small (<100) on this repo. The
-    return shape mimics the GitHub CLI's `--json number,headRefName`:
-    ``[{"number": int, "headRefName": str}, ...]``.
+    `prefix`. Uses `gh pr diff` per PR — we only need to walk PRs that are
+    actually in flight, so the cost is bounded by open-PR count.
    """
-    body = _gitea_get(
-        f"repos/{repo}/pulls",
-        {"state": "open", "limit": "50"},
-    )
-    if body is None:
-        # Best-effort: a transient Gitea blip shouldn't fail the PR
-        # check (the base-branch collision check runs locally and is
-        # the more common failure mode).
-        return []
-    prs = json.loads(body)
+    out = run([
+        "gh", "pr", "list", "--repo", repo, "--state", "open",
+        "--json", "number,headRefName", "--limit", "100",
+    ])
+    prs = json.loads(out)
    matches: list[dict] = []
    for pr in prs:
        num = pr["number"]
        if num == exclude_pr:
            continue
-        # Gitea returns the head ref under .head.ref (REST shape);
-        # GitHub CLI's --json headRefName flattens it. Normalize on
-        # the way out so callers see the historical shape.
-        head_ref_name = (pr.get("head") or {}).get("ref", "")
-        files_body = _gitea_get(f"repos/{repo}/pulls/{num}/files", {"limit": "100"})
-        if files_body is None:
-            continue
        try:
-            files = json.loads(files_body)
-        except json.JSONDecodeError:
+            files = run([
+                "gh", "pr", "diff", str(num), "--repo", repo, "--name-only",
+            ], check=False)
+        except Exception:  # noqa: BLE001
            continue
-        for f in files:
-            # Gitea's /pulls/{n}/files returns objects with `.filename`
-            # (same as GitHub's REST). Older Gitea versions emit
-            # `.name` instead — handle both.
-            raw = f.get("filename") or f.get("name") or ""
+        for raw in files.splitlines():
            path = Path(raw.strip())
            if not path.name:
                continue
            m = MIGRATION_FILE_RE.match(path.name)
            if m and int(m.group(1)) == prefix:
-                matches.append({"number": num, "headRefName": head_ref_name})
+                matches.append(pr)
                break
    return matches

@ -219,10 +138,7 @@ def main() -> int:
    pr_number = int(pr_number_env)
    base_ref = os.environ.get("BASE_REF", "origin/staging")
    head_ref = os.environ.get("HEAD_REF", "HEAD")
-    # Default kept lowercase to match the Gitea-canonical org name
-    # (post-2026-05-06 migration). Tests + workflow context override
-    # via GITHUB_REPOSITORY which act_runner sets per-run.
-    repo = os.environ.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
+    repo = os.environ.get("GITHUB_REPOSITORY", "Molecule-AI/molecule-core")

    added = migrations_in_diff(base_ref, head_ref)
    if not added:
--- a/tests/harness/README.md
+++ b/tests/harness/README.md
@ -1,7 +1,5 @@
 # Production-shape local harness

-<!-- Retrigger Harness Replays after Class G #168 + clone-manifest fix (#42). -->
-
 The harness brings up the SaaS tenant topology on localhost using the
 same `Dockerfile.tenant` image that ships to production. Tests target
 the cf-proxy on `http://localhost:8080` and pass the tenant identity
--- a/tests/harness/cf-proxy/Dockerfile
+++ b/tests/harness/cf-proxy/Dockerfile
@ -1,14 +0,0 @@
-# cf-proxy harness image — nginx + the harness's tenant-routing config baked
-# in at build time.
-#
-# Why bake (not bind-mount): on Gitea Actions / act_runner, the runner is a
-# container talking to the OUTER docker daemon over the host socket; runc
-# resolves bind-mount source paths on the outer host filesystem, where the
-# repo at `/workspace/.../tests/harness/cf-proxy/nginx.conf` is invisible.
-# Compose `configs:` (with `file:`) falls back to bind mounts when swarm is
-# not active, so it hits the same gap. A build-time COPY uploads the file
-# as part of the docker build context — the daemon receives the tarball
-# directly and never bind-mounts. See issue #88 item 2.
-FROM nginx:1.27-alpine
-
-COPY nginx.conf /etc/nginx/nginx.conf
--- a/tests/harness/compose.yml
+++ b/tests/harness/compose.yml
@ -167,26 +167,15 @@ services:
  # Production shape: same single CF tunnel front-doors every tenant
  # subdomain — the Host header carries the tenant identity, not the
  # routing destination. Local cf-proxy mirrors this exactly.
-  #
-  # nginx.conf delivery: built into a custom image via cf-proxy/Dockerfile
-  # (a thin nginx:1.27-alpine + COPY). NOT a bind mount and NOT a
-  # compose `configs:` block, both of which break under Gitea's
-  # act_runner: the runner talks to the OUTER docker daemon over the
-  # host socket, and runc resolves bind sources on the outer host
-  # filesystem, where `/workspace/.../tests/harness/cf-proxy/nginx.conf`
-  # is invisible. Compose `configs:` falls back to bind mounts without
-  # swarm, so it hits the same gap. A build context, by contrast, is
-  # uploaded to the daemon as a tarball at build time — no bind. See
-  # issue #88 item 2.
  cf-proxy:
-    build:
-      context: ./cf-proxy
-      dockerfile: Dockerfile
+    image: nginx:1.27-alpine
    depends_on:
      tenant-alpha:
        condition: service_healthy
      tenant-beta:
        condition: service_healthy
+    volumes:
+      - ./cf-proxy/nginx.conf:/etc/nginx/nginx.conf:ro
    # Bind to 127.0.0.1 only — hardcoded ADMIN_TOKENs make 0.0.0.0
    # exposure unsafe even on a local network.
    ports:
--- a/tools/branch-protection/check_name_parity.sh
+++ b/tools/branch-protection/check_name_parity.sh
@ -1,252 +0,0 @@
-#!/usr/bin/env bash
-# tools/branch-protection/check_name_parity.sh — assert every required-
-# check name listed in apply.sh maps to a workflow job whose "always
-# emits this status" shape is intact.
-#
-# Closes #144 / encodes the saved memory
-# feedback_branch_protection_check_name_parity:
-#
-#   "Path filters (e.g., detect-changes → conditional skip) silently
-#    break branch protection because no job emits the protected
-#    sentinel status when path-filter returns false."
-#
-# Two safe shapes for a required-check job:
-#
-#   1. Single-job-with-per-step-if (path-filter case):
-#      The workflow has NO top-level `paths:` filter; the always-running
-#      job has steps gated on `if: needs.<gate>.outputs.<flag> == 'true'`
-#      so the no-op step alone fires when paths exclude the commit.
-#      Used by ci.yml's Platform/Canvas/Python/Shellcheck and by
-#      e2e-api.yml / e2e-staging-canvas.yml / runtime-prbuild-compat.yml.
-#
-#   2. Aggregator-with-needs+always() (matrix-refactor case):
-#      An aggregator job named after the protected check `needs:` the
-#      matrix children + uses `if: always()` + checks each child's
-#      result. (Not currently in this repo but supported.)
-#
-# Unsafe shape this script catches:
-#   - Workflow has top-level `paths:` filter AND the protected check
-#     name is on a single job. When paths-filter excludes a commit, the
-#     workflow doesn't fire — branch protection waits forever.
-#
-# Exit codes:
-#   0 — every required check name has at least one safe-shape match
-#   1 — a required name has no match OR matches an unsafe shape
-#   2 — script-internal error (apply.sh missing, awk failure, etc.)
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
-WORKFLOWS_DIR="$REPO_ROOT/.github/workflows"
-APPLY_SH="$SCRIPT_DIR/apply.sh"
-
-if [[ ! -f "$APPLY_SH" ]]; then
-  echo "check_name_parity: missing apply.sh at $APPLY_SH" >&2
-  exit 2
-fi
-if [[ ! -d "$WORKFLOWS_DIR" ]]; then
-  echo "check_name_parity: missing .github/workflows at $WORKFLOWS_DIR" >&2
-  exit 2
-fi
-
-# ─── Extract the union of required check names from apply.sh ──────
-# apply.sh has STAGING_CHECKS and MAIN_CHECKS heredocs; union them so
-# we audit any name that gates EITHER branch. Filters out blank lines
-# and the heredoc end marker. Sorted + uniq so the audit output is stable.
-#
-# Captures the heredoc end-marker dynamically from the `<<'MARKER'`
-# token on the opening line — the token can be `EOF` (production
-# apply.sh), `EOF2` (test fixtures with nested heredocs), or any other
-# bash-legal identifier. Without dynamic extraction, test fixtures
-# with nested heredocs would either skip-capture (wrong end marker)
-# or capture the inner end marker as a stray check name.
-#
-# Two-step approach to keep awk-portable across BSD awk (macOS) and
-# gawk (Linux): grep finds the heredoc-opening lines, sed extracts the
-# marker, then awk does the capture. Pure-awk attempts hit BSD-vs-GNU
-# regex/variable-init differences that regress silently — this shape
-# stays in POSIX-portable territory.
-extract_heredoc_block() {
-  local file="$1"
-  local marker="$2"
-  awk -v marker="$marker" '
-    $0 ~ "<<.?" marker { capture=1; next }
-    $0 == marker && capture { capture=0; next }
-    capture && NF { print }
-  ' "$file"
-}
-
-# Find every heredoc-end marker used in apply.sh (typically just EOF
-# in the production script, but EOF2 / TAG / ABC are all valid in
-# fixtures or future expansions). Each marker maps to one or more
-# heredoc blocks; we union all of them.
-markers=$(grep -E "<<['\"]?[A-Za-z0-9_]+['\"]?[[:space:]]*\\|\\|" "$APPLY_SH" \
-  | sed -E "s/.*<<['\"]?([A-Za-z0-9_]+)['\"]?.*/\\1/" \
-  | sort -u)
-
-required_names=""
-while IFS= read -r marker; do
-  [[ -z "$marker" ]] && continue
-  block=$(extract_heredoc_block "$APPLY_SH" "$marker")
-  if [[ -n "$block" ]]; then
-    required_names+="$block"$'\n'
-  fi
-done <<< "$markers"
-
-required_names=$(printf '%s' "$required_names" | sort -u | sed '/^$/d')
-
-if [[ -z "$required_names" ]]; then
-  echo "check_name_parity: failed to extract required check names from apply.sh" >&2
-  exit 2
-fi
-
-# ─── For each required name, find the workflow file that owns it ──
-# A workflow "owns" a name if any `name:` line in the file equals the
-# required name. We look at job-level names AND the workflow-level
-# `name:` (the latter prefixes "Analyze" jobs in codeql.yml).
-#
-# Then we check whether the owning workflow has a top-level `paths:`
-# filter. The unsafe shape is:
-#   - top-level paths: filter present
-#   - AND the named job is gated only at the workflow level (no per-
-#     step `if:` gates)
-#
-# Distinguishing "no `paths:` filter" from "paths: filter + per-step
-# gating" requires parsing the YAML semantics. We do it heuristically:
-#
-#   - "no top-level paths:"     → safe by construction (workflow always
-#                                  fires)
-#   - "paths: present"          → check that the matching job has at
-#                                  least one `if: needs.<x>.outputs`
-#                                  step gate. If yes, that's the
-#                                  single-job-with-per-step-if shape.
-#                                  If no, flag as unsafe.
-#
-# Heuristic so it stays a portable bash + awk + grep tool — full YAML
-# parsing would need yq which isn't a dependency. The known unsafe
-# shape (workflow-level paths: AND no per-step if-gates) is what we're
-# trying to catch.
-
-failed=0
-declare -a unsafe_findings=()
-
-while IFS= read -r name; do
-  [[ -z "$name" ]] && continue
-  # Find every workflow file that contains a job with `name: <name>` or
-  # whose top-level workflow `name:` plus matrix substitution would
-  # produce <name>. Need to be careful about quoting — YAML allows
-  # `name: Foo`, `name: "Foo"`, `name: 'Foo'`. Strip quotes.
-  matches=()
-  while IFS= read -r f; do
-    # Look for an exact `name:` match (anywhere in the file). The
-    # workflow-level name line is at column 0; job-level names are
-    # indented. Either is acceptable for parity — what matters is
-    # whether the EMITTED check-run name is the one we required.
-    # Strip surrounding quotes/whitespace before comparing.
-    if awk -v want="$name" '
-      /^[[:space:]]*name:[[:space:]]*/ {
-        line = $0
-        sub(/^[[:space:]]*name:[[:space:]]*/, "", line)
-        # Strip surrounding " or '\''
-        gsub(/^["\047]|["\047]$/, "", line)
-        # Strip trailing whitespace + comment
-        sub(/[[:space:]]*#.*$/, "", line)
-        sub(/[[:space:]]+$/, "", line)
-        if (line == want) found = 1
-      }
-      END { exit !found }
-    ' "$f"; then
-      matches+=("$f")
-    fi
-  done < <(find "$WORKFLOWS_DIR" -name '*.yml' -o -name '*.yaml')
-
-  if [[ ${#matches[@]} -eq 0 ]]; then
-    # Special case — Analyze (go/javascript-typescript/python) is
-    # generated by codeql.yml's matrix expansion of `Analyze (${{
-    # matrix.language }})`. Don't flag those as missing if codeql.yml
-    # exists with the expected base name.
-    case "$name" in
-      "Analyze (go)"|"Analyze (javascript-typescript)"|"Analyze (python)")
-        # shellcheck disable=SC2016
-        # The literal `${{ matrix.language }}` is the GHA template
-        # syntax we're searching FOR — not a shell expansion. SC2016
-        # would have us add quotes that defeat the search.
-        if [[ -f "$WORKFLOWS_DIR/codeql.yml" ]] && \
-           grep -q 'name: Analyze (${{[[:space:]]*matrix.language[[:space:]]*}})' "$WORKFLOWS_DIR/codeql.yml"; then
-          matches=("$WORKFLOWS_DIR/codeql.yml")
-        fi
-        ;;
-    esac
-  fi
-
-  if [[ ${#matches[@]} -eq 0 ]]; then
-    unsafe_findings+=("MISSING: required check name '$name' has no matching workflow job")
-    failed=1
-    continue
-  fi
-
-  # For each owning workflow, classify safe vs unsafe.
-  for f in "${matches[@]}"; do
-    rel="${f#"$REPO_ROOT"/}"
-    # Heuristic: does the workflow have a top-level `paths:` filter?
-    # Top-level here means under the `on:` key, not under jobs.<x>.if.
-    # Workflow-level paths filters appear at indent depth 4 (under
-    # `push:` or `pull_request:`). Job-level `if:` paths-filter doesn't
-    # block the workflow from firing.
-    has_top_paths=0
-    if awk '
-      # Track whether we are inside the `on:` block. The `on:` block
-      # starts at column 0 (`on:` key) and ends when the next column-0
-      # key appears.
-      /^on:[[:space:]]*$/ { in_on = 1; next }
-      /^[a-zA-Z]/ && in_on { in_on = 0 }
-      in_on && /^[[:space:]]+paths:[[:space:]]*$/ { print "yes"; exit }
-      in_on && /^[[:space:]]+paths:[[:space:]]*\[/ { print "yes"; exit }
-    ' "$f" | grep -q yes; then
-      has_top_paths=1
-    fi
-
-    if [[ "$has_top_paths" -eq 0 ]]; then
-      # Safe: workflow always fires. If there are inner per-step if-
-      # gates (single-job-with-per-step-if pattern), the no-op step
-      # produces SUCCESS for the protected name — branch-protection-clean.
-      continue
-    fi
-
-    # Unsafe candidate — has top-level paths: AND we need to verify
-    # the per-step if-gate pattern is absent. Look for any `if:`
-    # referencing a paths-filter / detect-changes output inside the
-    # owning job's body. If at least one is present, classify as the
-    # single-job-with-per-step-if pattern (safe).
-    #
-    # The regex is intentionally anchored loosely — actual workflow
-    # YAML writes per-step if-gates as `      - if: needs.X.outputs.Y`
-    # (with the `-` step-marker between the leading spaces and the
-    # `if`). Anchoring on `^[[:space:]]+if:` would miss those.
-    if grep -qE "if:[[:space:]]+needs\.[a-zA-Z_-]+\.outputs\." "$f"; then
-      # Per-step if-gates exist. Combined with top-level paths: this
-      # would be a buggy mix (the workflow might still skip entirely
-      # when paths exclude). Flag as unsafe — the safe pattern omits
-      # the top-level paths: filter altogether and gates per-step.
-      unsafe_findings+=("UNSAFE-MIX: $rel has top-level paths: AND per-step if-gates — when paths exclude the commit, the workflow doesn't fire and the required check '$name' is silently absent. Drop the top-level paths: filter; keep the per-step if-gates.")
-      failed=1
-    else
-      # Top-level paths: with no per-step if-gates: the canonical
-      # check-name parity bug.
-      unsafe_findings+=("UNSAFE-PATH-FILTER: $rel has top-level paths: filter and no per-step if-gates. When paths exclude the commit, no job emits the required check '$name' — branch protection waits forever. Either drop the paths: filter and add per-step if-gates against a detect-changes output, or add an aggregator-with-needs+always() job that emits '$name'.")
-      failed=1
-    fi
-  done
-done <<< "$required_names"
-
-if [[ "$failed" -eq 0 ]]; then
-  echo "check_name_parity: OK — every required check name maps to a safe workflow shape."
-  exit 0
-fi
-
-echo "check_name_parity: FOUND $((${#unsafe_findings[@]})) issue(s):" >&2
-for finding in "${unsafe_findings[@]}"; do
-  echo "  - $finding" >&2
-done
-exit 1
--- a/tools/branch-protection/test_check_name_parity.sh
+++ b/tools/branch-protection/test_check_name_parity.sh
@ -1,285 +0,0 @@
-#!/usr/bin/env bash
-# tools/branch-protection/test_check_name_parity.sh — unit tests for
-# check_name_parity.sh.
-#
-# Builds synthetic apply.sh + workflow files in a tmpdir for each case,
-# invokes the script with REPO_ROOT pointing at the tmpdir, and asserts
-# on exit code + stderr. Per feedback_assert_exact_not_substring we
-# pin the EXACT exit code AND a substring of the stderr that names the
-# offending workflow + name combo — so a "false-pass that prints the
-# wrong message" still fails the test.
-#
-# Run locally: bash tools/branch-protection/test_check_name_parity.sh
-# Run in CI:  same — added to ci.yml's shellcheck job's "E2E bash unit
-#             tests" step alongside test_model_slug.sh.
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-SCRIPT_UNDER_TEST="$SCRIPT_DIR/check_name_parity.sh"
-
-if [[ ! -x "$SCRIPT_UNDER_TEST" ]]; then
-  echo "test_check_name_parity: script under test missing or not executable: $SCRIPT_UNDER_TEST" >&2
-  exit 2
-fi
-
-PASSED=0
-FAILED=0
-
-# Tracks the active tmpdir for the running case so the trap can clean
-# up even when assertions abort the case mid-flight.
-TMPDIR_FOR_CASE=""
-trap '[[ -n "$TMPDIR_FOR_CASE" && -d "$TMPDIR_FOR_CASE" ]] && rm -rf "$TMPDIR_FOR_CASE"' EXIT
-
-# Build a synthetic repo at $1 with apply.sh listing $2 (one name per
-# line) as the staging required set + zero main required, then write
-# whatever .github/workflows/* files the test case adds.
-make_fake_repo() {
-  local root="$1"
-  local checks="$2"
-  mkdir -p "$root/tools/branch-protection"
-  mkdir -p "$root/.github/workflows"
-  cat > "$root/tools/branch-protection/apply.sh" <<EOF
-#!/usr/bin/env bash
-# Stub apply.sh — only the heredoc-shaped check lists matter for the
-# parity script. Other functions intentionally absent.
-
-read -r -d '' STAGING_CHECKS <<'EOF2' || true
-$checks
-EOF2
-
-read -r -d '' MAIN_CHECKS <<'EOF2' || true
-$checks
-EOF2
-EOF
-  chmod +x "$root/tools/branch-protection/apply.sh"
-  # Place the script-under-test alongside its sibling apply.sh so the
-  # script's REPO_ROOT walk finds the synthetic .github/workflows/.
-  cp "$SCRIPT_UNDER_TEST" "$root/tools/branch-protection/check_name_parity.sh"
-}
-
-run_case() {
-  local desc="$1"
-  local checks="$2"
-  local workflow_yaml="$3"   # contents to write
-  local workflow_filename="$4"
-  local expected_exit="$5"
-  local expected_stderr_substring="$6"
-  TMPDIR_FOR_CASE=$(mktemp -d)
-  make_fake_repo "$TMPDIR_FOR_CASE" "$checks"
-  printf '%s' "$workflow_yaml" > "$TMPDIR_FOR_CASE/.github/workflows/$workflow_filename"
-  local stderr_file
-  stderr_file=$(mktemp)
-  local actual_exit=0
-  bash "$TMPDIR_FOR_CASE/tools/branch-protection/check_name_parity.sh" 2>"$stderr_file" >/dev/null || actual_exit=$?
-  local stderr_content
-  stderr_content=$(cat "$stderr_file")
-  rm "$stderr_file"
-  if [[ "$actual_exit" -ne "$expected_exit" ]]; then
-    echo "FAIL: $desc"
-    echo "  expected exit: $expected_exit, got: $actual_exit"
-    echo "  stderr: $stderr_content"
-    FAILED=$((FAILED+1))
-    rm -rf "$TMPDIR_FOR_CASE"; TMPDIR_FOR_CASE=""
-    return
-  fi
-  # Empty expected substring → no assertion on stderr (used for the
-  # passing case where stderr should be empty / not interesting).
-  if [[ -n "$expected_stderr_substring" ]]; then
-    if ! grep -qF "$expected_stderr_substring" <<< "$stderr_content"; then
-      echo "FAIL: $desc"
-      echo "  expected stderr to contain: '$expected_stderr_substring'"
-      echo "  actual stderr: $stderr_content"
-      FAILED=$((FAILED+1))
-      rm -rf "$TMPDIR_FOR_CASE"; TMPDIR_FOR_CASE=""
-      return
-    fi
-  fi
-  echo "PASS: $desc"
-  PASSED=$((PASSED+1))
-  rm -rf "$TMPDIR_FOR_CASE"; TMPDIR_FOR_CASE=""
-}
-
-# Case 1: safe workflow — no top-level paths: filter, single job
-# emitting the required name. Should exit 0.
-run_case "safe: no paths filter, job emits required name" \
-  "Foo Build" \
-  "$(cat <<'EOF'
-name: Foo
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-
-jobs:
-  foo:
-    name: Foo Build
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo ok
-EOF
-)" \
-  "foo.yml" \
-  0 \
-  ""
-
-# Case 2: unsafe — top-level paths: filter AND no per-step if-gates.
-# This is the silent-block shape from the saved memory.
-run_case "unsafe: top-level paths: filter without per-step if-gates" \
-  "Bar Build" \
-  "$(cat <<'EOF'
-name: Bar
-
-on:
-  push:
-    branches: [main]
-    paths:
-      - 'bar/**'
-  pull_request:
-    paths:
-      - 'bar/**'
-
-jobs:
-  bar:
-    name: Bar Build
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo ok
-EOF
-)" \
-  "bar.yml" \
-  1 \
-  "UNSAFE-PATH-FILTER"
-
-# Case 3: required name has no emitter at all.
-run_case "missing: required name not in any workflow" \
-  "Nonexistent Job" \
-  "$(cat <<'EOF'
-name: Other
-
-on:
-  pull_request:
-
-jobs:
-  other:
-    name: Other Job
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo ok
-EOF
-)" \
-  "other.yml" \
-  1 \
-  "MISSING: required check name 'Nonexistent Job'"
-
-# Case 4: safe — top-level paths: filter is absent BUT per-step if-
-# gates are present (single-job-with-per-step-if pattern, what
-# ci.yml + e2e-api.yml use). Should exit 0.
-run_case "safe: per-step if-gates without top-level paths" \
-  "Baz Build" \
-  "$(cat <<'EOF'
-name: Baz
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-
-jobs:
-  changes:
-    name: Detect changes
-    runs-on: ubuntu-latest
-    outputs:
-      baz: ${{ steps.check.outputs.baz }}
-    steps:
-      - id: check
-        run: echo "baz=true" >> "$GITHUB_OUTPUT"
-
-  baz:
-    needs: changes
-    name: Baz Build
-    runs-on: ubuntu-latest
-    steps:
-      - if: needs.changes.outputs.baz != 'true'
-        run: echo no-op
-      - if: needs.changes.outputs.baz == 'true'
-        run: echo real work
-EOF
-)" \
-  "baz.yml" \
-  0 \
-  ""
-
-# Case 5: unsafe-mix — top-level paths: AND per-step if-gates. The
-# script flags this distinctly because the workflow may STILL skip
-# entirely when paths exclude the commit (the per-step gates only
-# matter if the workflow actually fires).
-run_case "unsafe-mix: top-level paths: AND per-step if-gates" \
-  "Qux Build" \
-  "$(cat <<'EOF'
-name: Qux
-
-on:
-  push:
-    branches: [main]
-    paths:
-      - 'qux/**'
-  pull_request:
-    paths:
-      - 'qux/**'
-
-jobs:
-  changes:
-    name: Detect changes
-    runs-on: ubuntu-latest
-    outputs:
-      qux: ${{ steps.check.outputs.qux }}
-    steps:
-      - id: check
-        run: echo "qux=true" >> "$GITHUB_OUTPUT"
-
-  qux:
-    needs: changes
-    name: Qux Build
-    runs-on: ubuntu-latest
-    steps:
-      - if: needs.changes.outputs.qux == 'true'
-        run: echo build
-EOF
-)" \
-  "qux.yml" \
-  1 \
-  "UNSAFE-MIX"
-
-# Case 6: codeql.yml matrix — required names like "Analyze (go)" are
-# generated by `Analyze (${{ matrix.language }})`. Script must
-# special-case match this pattern.
-run_case "matrix: codeql Analyze (go) is recognised via matrix expansion" \
-  "$(printf 'Analyze (go)\nAnalyze (javascript-typescript)\nAnalyze (python)')" \
-  "$(cat <<'EOF'
-name: CodeQL
-
-on:
-  pull_request:
-
-jobs:
-  analyze:
-    name: Analyze (${{ matrix.language }})
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        language: [go, javascript-typescript, python]
-    steps:
-      - run: echo analyse
-EOF
-)" \
-  "codeql.yml" \
-  0 \
-  ""
-
-echo ""
-echo "================================================"
-echo "test_check_name_parity: $PASSED passed, $FAILED failed"
-echo "================================================"
-exit "$FAILED"
--- a/workspace-server/.air.toml
+++ b/workspace-server/.air.toml
@ -1,49 +0,0 @@
-# air.toml — live-reload config for local docker-compose dev mode.
-#
-# Active when the platform service runs from workspace-server/Dockerfile.dev
-# (selected via docker-compose.dev.yml overlay). In production, the regular
-# Dockerfile builds a static binary; air is dev-only.
-#
-# Reference: https://github.com/air-verse/air
-
-root = "."
-testdata_dir = "testdata"
-tmp_dir = "tmp"
-
-[build]
-  # Same build invocation as Dockerfile's builder stage minus the
-  # CGO_ENABLED=0 toggle (CGO ok in dev for richer race detector output).
-  cmd = "go build -o ./tmp/server ./cmd/server"
-  bin = "tmp/server"
-  full_bin = ""
-  args_bin = []
-  # Watch every .go and .yaml file under workspace-server/.
-  include_ext = ["go", "yaml", "tmpl"]
-  # Don't watch tests, build artifacts, vendored deps, or migration .sql
-  # (migrations need a clean DB anyway — handled by docker-compose down/up).
-  exclude_dir = ["assets", "tmp", "vendor", "testdata", "node_modules"]
-  exclude_file = []
-  # _test.go and *_mock.go shouldn't trigger a rebuild — saves cycles.
-  exclude_regex = ["_test\\.go$", "_mock\\.go$"]
-  exclude_unchanged = true
-  follow_symlink = false
-  log = "build-errors.log"
-  # Kill running binary 1s before starting new one.
-  kill_delay = "1s"
-  send_interrupt = true
-  stop_on_error = true
-  # Debounce: wait this long after last change before triggering rebuild.
-  delay = 500
-
-[log]
-  time = false
-
-[color]
-  main = "magenta"
-  watcher = "cyan"
-  build = "yellow"
-  runner = "green"
-
-[misc]
-  # Don't keep the tmp/ dir around between runs.
-  clean_on_exit = true
--- a/workspace-server/.gitignore
+++ b/workspace-server/.gitignore
@ -1,5 +1,2 @@
 # The compiled binary, not the cmd/server package.
 /server
-
-# air live-reload build cache (Dockerfile.dev + docker-compose.dev.yml).
-/tmp/
--- a/workspace-server/Dockerfile
+++ b/workspace-server/Dockerfile
@ -1,15 +1,7 @@
-# Platform-only image (no canvas). Used by publish-workspace-server-image
-# workflow for ECR. Tenant image uses Dockerfile.tenant instead.
+# Platform-only image (no canvas). Used by publish-platform-image workflow
+# for GHCR + Fly registry. Tenant image uses Dockerfile.tenant instead.
 #
-# Templates + plugins are pre-cloned by scripts/clone-manifest.sh (in CI
-# or on the operator host) into .tenant-bundle-deps/ — same pattern as
-# Dockerfile.tenant. See that file's header for the full rationale; the
-# short version is that post-2026-05-06 every workspace-template-* and
-# org-template-* repo on Gitea is private, so an in-image `git clone`
-# has no auth path that doesn't leak the Gitea token into a layer.
-#
-# Build context: repo root, with `.tenant-bundle-deps/` populated by the
-# workflow's "Pre-clone manifest deps" step (Task #173).
+# Build context: repo root.

 FROM golang:1.25-alpine AS builder
 WORKDIR /app
@ -34,18 +26,21 @@ RUN CGO_ENABLED=0 GOOS=linux go build \
    -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
    -o /memory-plugin ./cmd/memory-plugin-postgres

+# Clone templates + plugins at build time from manifest.json
+FROM alpine:3.20 AS templates
+RUN apk add --no-cache git jq
+COPY manifest.json /manifest.json
+COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
+RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
+
 FROM alpine:3.20
 RUN apk add --no-cache ca-certificates git tzdata wget
 COPY --from=builder /platform /platform
 COPY --from=builder /memory-plugin /memory-plugin
 COPY workspace-server/migrations /migrations
-# Templates + plugins (pre-cloned by scripts/clone-manifest.sh in the
-# trusted CI / operator-host context, .git already stripped). The Gitea
-# token used to clone them never enters this image — same shape as
-# Dockerfile.tenant.
-COPY .tenant-bundle-deps/workspace-configs-templates /workspace-configs-templates
-COPY .tenant-bundle-deps/org-templates /org-templates
-COPY .tenant-bundle-deps/plugins /plugins
+COPY --from=templates /workspace-configs-templates /workspace-configs-templates
+COPY --from=templates /org-templates /org-templates
+COPY --from=templates /plugins /plugins
 # Non-root runtime with Docker socket access for workspace provisioning.
 RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platform
 EXPOSE 8080
--- a/workspace-server/Dockerfile.dev
+++ b/workspace-server/Dockerfile.dev
@ -1,44 +0,0 @@
-# Dockerfile.dev — local-development image with air-driven live reload.
-#
-# Selected by docker-compose.dev.yml (overlay over docker-compose.yml).
-# Production stays on workspace-server/Dockerfile (static binary, no air).
-#
-# Workflow:
-#   1. docker compose -f docker-compose.yml -f docker-compose.dev.yml up
-#   2. Edit any .go file under workspace-server/
-#   3. air detects, rebuilds, kills old binary, starts new one (~3-5s)
-#   4. No `docker compose up --build` needed
-#
-# Templates + plugins are NOT pre-cloned here — air-mode assumes the
-# developer's filesystem has the workspace-configs-templates/ + plugins/
-# dirs available, mounted at runtime via docker-compose.dev.yml.
-
-FROM golang:1.25-alpine
-
-# air + git (for go mod) + ca-certs (for TLS) + tzdata (for time-zone DB)
-# + docker-cli + docker-cli-buildx so the platform binary can shell out to
-# /var/run/docker.sock (bind-mounted from host) for local-build provisioning.
-# docker-cli alone is insufficient: alpine's docker-cli enables BuildKit by
-# default but ships without buildx, producing
-# `ERROR: BuildKit is enabled but the buildx component is missing or broken`
-# on every `docker build`. docker-cli-buildx provides the buildx subcommand.
-RUN apk add --no-cache git ca-certificates tzdata wget docker-cli docker-cli-buildx \
- && go install github.com/air-verse/air@latest
-
-WORKDIR /app/workspace-server
-
-# Pre-fetch deps so the first `air` rebuild on a fresh container is fast.
-# These are bind-mount-overridden at runtime, so the COPY here is just
-# to warm the module cache.
-COPY workspace-server/go.mod workspace-server/go.sum ./
-RUN go mod download
-
-# Source is bind-mounted at runtime (see docker-compose.dev.yml volumes
-# block) so the Dockerfile doesn't need to COPY it. air watches the
-# bind-mounted dir for changes.
-
-ENV CGO_ENABLED=0
-ENV GOFLAGS="-buildvcs=false"
-
-# Run air with the .air.toml in the bind-mounted source dir.
-CMD ["air", "-c", ".air.toml"]
--- a/workspace-server/Dockerfile.tenant
+++ b/workspace-server/Dockerfile.tenant
@ -3,34 +3,14 @@
 # Serves both the API (Go on :8080) and the UI (Node.js on :3000) in a
 # single container. Go reverse-proxies unknown routes to canvas.
 #
-# Templates + plugins are NOT cloned at build time. They are pre-cloned
-# in the trusted CI context (or operator host) by
-# `scripts/clone-manifest.sh` into `.tenant-bundle-deps/` and COPYed in.
-# The reason: post-2026-05-06, every workspace-template-* repo on Gitea
-# (codex, crewai, deepagents, gemini-cli, langgraph) plus all 7
-# org-template-* repos are private, so the Docker build can't `git clone`
-# from inside the build context — there's no auth path that doesn't leak
-# the Gitea token into an image layer. Pre-cloning keeps the token in
-# the CI environment only; the resulting image carries the cloned trees
-# with `.git` already stripped (see clone-manifest.sh).
+# Templates are cloned from standalone GitHub repos at build time so the
+# monorepo doesn't need to carry them. The repos are public; no auth.
 #
-# Build context: repo root, with `.tenant-bundle-deps/` populated by:
-#
-#     MOLECULE_GITEA_TOKEN=<persona-PAT> scripts/clone-manifest.sh \
-#       manifest.json \
-#       .tenant-bundle-deps/workspace-configs-templates \
-#       .tenant-bundle-deps/org-templates \
-#       .tenant-bundle-deps/plugins
-#
-# In CI this happens in publish-workspace-server-image.yml's "Pre-clone
-# manifest deps" step (uses AUTO_SYNC_TOKEN = devops-engineer persona).
-# For a manual operator-host build, source the same token from
-# /etc/molecule-bootstrap/agent-secrets.env first.
+# Build context: repo root.
 #
 #   docker buildx build --platform linux/amd64 \
 #     -f workspace-server/Dockerfile.tenant \
-#     -t <ECR>/molecule-ai/platform-tenant:latest \
-#     --build-arg GIT_SHA=<sha> --build-arg NEXT_PUBLIC_PLATFORM_URL= \
+#     -t registry.fly.io/molecule-tenant:latest \
 #     --push .

 # ── Stage 1: Go platform binary ──────────────────────────────────────
@ -75,7 +55,14 @@ ENV NEXT_PUBLIC_PLATFORM_URL=$NEXT_PUBLIC_PLATFORM_URL
 ENV NEXT_PUBLIC_WS_URL=$NEXT_PUBLIC_WS_URL
 RUN npm run build

-# ── Stage 3: Runtime ──────────────────────────────────────────────────
+# ── Stage 3: Clone templates + plugins from manifest.json ─────────────
+FROM alpine:3.20 AS templates
+RUN apk add --no-cache git jq
+COPY manifest.json /manifest.json
+COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
+RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
+
+# ── Stage 4: Runtime ──────────────────────────────────────────────────
 FROM node:20-alpine
 RUN apk add --no-cache ca-certificates git tzdata openssh-client aws-cli

@ -100,13 +87,10 @@ COPY --from=go-builder /platform /platform
 COPY --from=go-builder /memory-plugin /memory-plugin
 COPY workspace-server/migrations /migrations

-# Templates + plugins (pre-cloned by scripts/clone-manifest.sh in the
-# trusted CI / operator-host context, .git already stripped — see
-# .tenant-bundle-deps/ in the build context). The Gitea token used to
-# clone them never enters this image.
-COPY .tenant-bundle-deps/workspace-configs-templates /workspace-configs-templates
-COPY .tenant-bundle-deps/org-templates /org-templates
-COPY .tenant-bundle-deps/plugins /plugins
+# Templates + plugins (cloned from GitHub in stage 3)
+COPY --from=templates /workspace-configs-templates /workspace-configs-templates
+COPY --from=templates /org-templates /org-templates
+COPY --from=templates /plugins /plugins

 # Canvas standalone
 WORKDIR /canvas
--- a/workspace-server/cmd/server/bind_test.go
+++ b/workspace-server/cmd/server/bind_test.go
@ -1,89 +0,0 @@
-package main
-
-import "testing"
-
-// TestResolveBindHost pins the precedence: BIND_ADDR explicit > dev-mode
-// fail-open default of 127.0.0.1 > production-shape empty (all interfaces).
-//
-// Mutation-test invariant: removing the IsDevModeFailOpen() branch makes
-// "no_bindaddr_devmode_unset_admin" fail (returns "" instead of "127.0.0.1").
-// Removing the BIND_ADDR branch makes "explicit_bindaddr_*" cases fail.
-func TestResolveBindHost(t *testing.T) {
-	cases := []struct {
-		name       string
-		bindAddr   string
-		adminToken string
-		molEnv     string
-		want       string
-	}{
-		{
-			name:       "no_bindaddr_devmode_unset_admin",
-			bindAddr:   "",
-			adminToken: "",
-			molEnv:     "dev",
-			want:       "127.0.0.1",
-		},
-		{
-			name:       "no_bindaddr_devmode_unset_admin_full_word",
-			bindAddr:   "",
-			adminToken: "",
-			molEnv:     "development",
-			want:       "127.0.0.1",
-		},
-		{
-			name:       "no_bindaddr_admin_set_in_dev_env",
-			bindAddr:   "",
-			adminToken: "secret",
-			molEnv:     "dev",
-			want:       "", // ADMIN_TOKEN flips IsDevModeFailOpen to false → all interfaces
-		},
-		{
-			name:       "no_bindaddr_production_env",
-			bindAddr:   "",
-			adminToken: "",
-			molEnv:     "production",
-			want:       "", // production is not a dev value → all interfaces
-		},
-		{
-			name:       "no_bindaddr_unset_env",
-			bindAddr:   "",
-			adminToken: "",
-			molEnv:     "",
-			want:       "", // unset MOLECULE_ENV → not dev → all interfaces
-		},
-		{
-			name:       "explicit_bindaddr_loopback_overrides_devmode",
-			bindAddr:   "127.0.0.1",
-			adminToken: "",
-			molEnv:     "dev",
-			want:       "127.0.0.1",
-		},
-		{
-			name:       "explicit_bindaddr_wildcard_overrides_devmode_default",
-			bindAddr:   "0.0.0.0",
-			adminToken: "",
-			molEnv:     "dev",
-			want:       "0.0.0.0",
-		},
-		{
-			name:       "explicit_bindaddr_in_production",
-			bindAddr:   "10.0.5.7",
-			adminToken: "secret",
-			molEnv:     "production",
-			want:       "10.0.5.7",
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			t.Setenv("BIND_ADDR", tc.bindAddr)
-			t.Setenv("ADMIN_TOKEN", tc.adminToken)
-			t.Setenv("MOLECULE_ENV", tc.molEnv)
-			got := resolveBindHost()
-			if got != tc.want {
-				t.Errorf("resolveBindHost() = %q, want %q (BIND_ADDR=%q ADMIN_TOKEN=%q MOLECULE_ENV=%q)",
-					got, tc.want, tc.bindAddr, tc.adminToken, tc.molEnv)
-			}
-		})
-	}
-}
--- a/workspace-server/cmd/server/main.go
+++ b/workspace-server/cmd/server/main.go
@ -19,7 +19,6 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
 	memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
@ -333,23 +332,15 @@ func main() {
 	// Router
 	r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle)

-	// HTTP server with graceful shutdown.
-	//
-	// Bind host: in dev-mode (no ADMIN_TOKEN, MOLECULE_ENV=dev|development)
-	// the AdminAuth chain fails open by design; pairing that with a wildcard
-	// bind would expose unauth /workspaces to any same-LAN peer. Default to
-	// loopback when fail-open is active. Operators who need LAN exposure set
-	// BIND_ADDR=0.0.0.0 explicitly. Production (ADMIN_TOKEN set) is unchanged.
-	// See molecule-core#7.
-	bindHost := resolveBindHost()
+	// HTTP server with graceful shutdown
 	srv := &http.Server{
-		Addr:    fmt.Sprintf("%s:%s", bindHost, port),
+		Addr:    fmt.Sprintf(":%s", port),
 		Handler: r,
 	}

 	// Start server in goroutine
 	go func() {
-		log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen())
+		log.Printf("Platform starting on :%s", port)
 		if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
 			log.Fatalf("Server failed: %v", err)
 		}
@ -384,29 +375,6 @@ func envOr(key, fallback string) string {
 	return fallback
 }

-// resolveBindHost picks the listener interface for the HTTP server.
-//
-// Precedence:
-//  1. BIND_ADDR — explicit operator override (any value, including "0.0.0.0").
-//  2. dev-mode fail-open active → "127.0.0.1" (loopback only).
-//  3. otherwise → "" (Go binds every interface; existing prod/self-host shape).
-//
-// Coupling the loopback default to middleware.IsDevModeFailOpen() means the
-// two safety levers — bind narrowness and auth strength — move together. A
-// production deploy (ADMIN_TOKEN set) keeps binding to all interfaces because
-// the auth chain is doing its job; a dev Mac (no ADMIN_TOKEN, MOLECULE_ENV=dev)
-// is reachable only via loopback because the auth chain is fail-open. See
-// molecule-core#7 for the original LAN exposure finding.
-func resolveBindHost() string {
-	if v := os.Getenv("BIND_ADDR"); v != "" {
-		return v
-	}
-	if middleware.IsDevModeFailOpen() {
-		return "127.0.0.1"
-	}
-	return ""
-}
-
 func findConfigsDir() string {
 	candidates := []string{
 		"workspace-configs-templates",
--- a/workspace-server/internal/handlers/a2a_proxy.go
+++ b/workspace-server/internal/handlers/a2a_proxy.go
@ -413,56 +413,11 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
 		return http.StatusOK, respBody, nil
 	}

-	// Mock-runtime short-circuit. Workspaces with runtime='mock' have
-	// no container, no EC2, no URL — every reply is synthesised here
-	// from a small canned-variant pool. Built for the "200-workspace
-	// mock org" demo: a CEO/VPs/Managers/ICs hierarchy that renders
-	// at scale on the canvas without burning real LLM credits or
-	// provisioning 200 EC2 instances. See mock_runtime.go for the
-	// full rationale + reply shape contract.
-	//
-	// Position: AFTER poll-mode (mock isn't a delivery mode, it's a
-	// runtime; treating poll-set-on-mock as poll matches operator
-	// intent if anyone ever does that), BEFORE resolveAgentURL (mock
-	// has no URL — going through resolveAgentURL would 404 on the
-	// SELECT url since the row is provisioned as NULL).
-	if status, respBody, handled := h.handleMockA2A(ctx, workspaceID, callerID, body, a2aMethod, logActivity); handled {
-		return status, respBody, nil
-	}
-
 	agentURL, proxyErr := h.resolveAgentURL(ctx, workspaceID)
 	if proxyErr != nil {
 		return 0, nil, proxyErr
 	}

-	// Pre-flight container-health check (#36). The dispatchA2A path below
-	// does Docker-DNS forwarding to `ws-<wsShort>:8000` and only catches a
-	// missing/dead container REACTIVELY via maybeMarkContainerDead in
-	// handleA2ADispatchError. That works but costs the caller a full
-	// network-timeout (2-30s) before the structured 503 surfaces.
-	//
-	// When we KNOW the workspace is container-backed (h.docker != nil + we
-	// rewrite to Docker-DNS form below), do a single proactive
-	// RunningContainerName lookup. If the container is genuinely missing,
-	// short-circuit with the same structured 503 + async restart that
-	// maybeMarkContainerDead would produce — but immediately, without the
-	// network round-trip.
-	//
-	// Three outcomes of provisioner.RunningContainerName(ctx, h.docker, id):
-	//   ("ws-<id>", nil) → forward as today.
-	//   ("",        nil) → container is genuinely not running. Fast-503.
-	//   ("",        err) → transient daemon error. Fall through to optimistic
-	//                       forward — matches Provisioner.IsRunning's
-	//                       (true, err) "fail-soft as alive" contract.
-	//
-	// Same SSOT as findRunningContainer (#10/#12). See AST gate
-	// TestProxyA2A_RoutesThroughProvisionerSSOT.
-	if h.provisioner != nil && platformInDocker && strings.HasPrefix(agentURL, "http://"+provisioner.ContainerName(workspaceID)+":") {
-		if proxyErr := h.preflightContainerHealth(ctx, workspaceID); proxyErr != nil {
-			return 0, nil, proxyErr
-		}
-	}
-
 	startTime := time.Now()
 	resp, cancelFwd, err := h.dispatchA2A(ctx, workspaceID, agentURL, body, callerID)
 	if cancelFwd != nil {
--- a/workspace-server/internal/handlers/a2a_proxy_helpers.go
+++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go
@ -198,60 +198,6 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
 	return true
 }

-// preflightContainerHealth runs a proactive Provisioner.IsRunning check
-// (#36) before dispatching the a2a forward. Routed through provisioner's
-// SSOT IsRunning, which itself wraps RunningContainerName — same source
-// as findRunningContainer in the plugins handler (#10/#12).
-//
-// Returns nil when the forward should proceed:
-//   - container is running, OR
-//   - daemon errored transiently (matches IsRunning's (true, err)
-//     "fail-soft as alive" contract — let the optimistic forward run
-//     and reactive maybeMarkContainerDead catch a real failure).
-//
-// Returns a structured 503 + triggers the same async restart that
-// maybeMarkContainerDead would produce, when:
-//   - container is genuinely not running (NotFound / Exited / Created…).
-//
-// The point of running this BEFORE the forward is to save the caller
-// 2-30s of network-timeout cost when the container is missing — a common
-// shape post-EC2-replace (see molecule-controlplane#20 incident
-// 2026-05-07) where the reconciler hasn't respawned the agent yet.
-func (h *WorkspaceHandler) preflightContainerHealth(ctx context.Context, workspaceID string) *proxyA2AError {
-	running, err := h.provisioner.IsRunning(ctx, workspaceID)
-	if err != nil {
-		// Transient daemon error. Provisioner.IsRunning returns (true, err)
-		// in this case — fall through to the optimistic forward, reactive
-		// maybeMarkContainerDead handles a real failure later.
-		log.Printf("ProxyA2A preflight: IsRunning transient error for %s: %v (proceeding with forward)", workspaceID, err)
-		return nil
-	}
-	if running {
-		// Container is running — forward as today.
-		return nil
-	}
-	// Container is genuinely not running. Mark offline + trigger restart
-	// (same effect as maybeMarkContainerDead's branch), and return the
-	// structured 503 immediately so the caller skips the forward.
-	log.Printf("ProxyA2A preflight: container for %s is not running — marking offline and triggering restart (#36)", workspaceID)
-	if _, dbErr := db.DB.ExecContext(ctx,
-		`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status NOT IN ('removed', 'provisioning')`,
-		models.StatusOffline, workspaceID); dbErr != nil {
-		log.Printf("ProxyA2A preflight: failed to mark workspace %s offline: %v", workspaceID, dbErr)
-	}
-	db.ClearWorkspaceKeys(ctx, workspaceID)
-	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
-	go h.RestartByID(workspaceID)
-	return &proxyA2AError{
-		Status: http.StatusServiceUnavailable,
-		Response: gin.H{
-			"error":      "workspace container not running — restart triggered",
-			"restarting": true,
-			"preflight":  true, // distinguishes from reactive containerDead path
-		},
-	}
-}
-
 // logA2AFailure records a failed A2A attempt to activity_logs in a detached
 // goroutine (the request context may already be done by the time it runs).
 func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int) {
--- a/workspace-server/internal/handlers/a2a_proxy_preflight_test.go
+++ b/workspace-server/internal/handlers/a2a_proxy_preflight_test.go
@ -1,194 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"errors"
-	"go/ast"
-	"go/parser"
-	"go/token"
-	"testing"
-
-	"github.com/DATA-DOG/go-sqlmock"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
-)
-
-// preflightLocalProv is a controllable LocalProvisionerAPI stub for the
-// preflight tests (#36). Other API methods panic to guard against tests
-// that should be using a different stub.
-type preflightLocalProv struct {
-	running    bool
-	err        error
-	calls      int
-	calledWith []string
-}
-
-func (p *preflightLocalProv) IsRunning(_ context.Context, workspaceID string) (bool, error) {
-	p.calls++
-	p.calledWith = append(p.calledWith, workspaceID)
-	return p.running, p.err
-}
-func (p *preflightLocalProv) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) {
-	panic("preflightLocalProv: Start not implemented")
-}
-func (p *preflightLocalProv) Stop(_ context.Context, _ string) error {
-	panic("preflightLocalProv: Stop not implemented")
-}
-func (p *preflightLocalProv) ExecRead(_ context.Context, _, _ string) ([]byte, error) {
-	panic("preflightLocalProv: ExecRead not implemented")
-}
-func (p *preflightLocalProv) RemoveVolume(_ context.Context, _ string) error {
-	panic("preflightLocalProv: RemoveVolume not implemented")
-}
-func (p *preflightLocalProv) VolumeHasFile(_ context.Context, _, _ string) (bool, error) {
-	panic("preflightLocalProv: VolumeHasFile not implemented")
-}
-func (p *preflightLocalProv) WriteAuthTokenToVolume(_ context.Context, _, _ string) error {
-	panic("preflightLocalProv: WriteAuthTokenToVolume not implemented")
-}
-
-// TestPreflight_ContainerRunning_ReturnsNil — IsRunning(true,nil): forward
-// proceeds. preflight returns nil → caller continues to dispatchA2A.
-func TestPreflight_ContainerRunning_ReturnsNil(t *testing.T) {
-	_ = setupTestDB(t)
-	stub := &preflightLocalProv{running: true, err: nil}
-	h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	h.provisioner = stub
-
-	if err := h.preflightContainerHealth(context.Background(), "ws-running-123"); err != nil {
-		t.Fatalf("preflight should return nil when container running, got %+v", err)
-	}
-	if stub.calls != 1 {
-		t.Errorf("IsRunning should be called exactly once, got %d", stub.calls)
-	}
-	if len(stub.calledWith) != 1 || stub.calledWith[0] != "ws-running-123" {
-		t.Errorf("IsRunning should be called with workspace id, got %v", stub.calledWith)
-	}
-}
-
-// TestPreflight_ContainerNotRunning_StructuredFastFail — IsRunning(false,nil):
-// preflight returns structured 503 with restarting=true + preflight=true, AND
-// triggers the offline-flip + WORKSPACE_OFFLINE broadcast + async restart.
-// This is the load-bearing case — saves the caller 2-30s of network timeout.
-func TestPreflight_ContainerNotRunning_StructuredFastFail(t *testing.T) {
-	mock := setupTestDB(t)
-	_ = setupTestRedis(t)
-	stub := &preflightLocalProv{running: false, err: nil}
-	h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	h.provisioner = stub
-
-	// Expect the offline-flip UPDATE.
-	mock.ExpectExec(`UPDATE workspaces SET status =`).
-		WithArgs(models.StatusOffline, "ws-dead-456").
-		WillReturnResult(sqlmock.NewResult(0, 1))
-	// Broadcaster's INSERT INTO structure_events fires too — best-effort
-	// log entry for the WORKSPACE_OFFLINE event. Match permissively.
-	mock.ExpectExec(`INSERT INTO structure_events`).
-		WillReturnResult(sqlmock.NewResult(0, 1))
-
-	proxyErr := h.preflightContainerHealth(context.Background(), "ws-dead-456")
-	if proxyErr == nil {
-		t.Fatal("preflight should return *proxyA2AError when container not running")
-	}
-	if proxyErr.Status != 503 {
-		t.Errorf("expected 503, got %d", proxyErr.Status)
-	}
-	if got := proxyErr.Response["restarting"]; got != true {
-		t.Errorf("response should mark restarting=true, got %v", got)
-	}
-	if got := proxyErr.Response["preflight"]; got != true {
-		t.Errorf("response should mark preflight=true so callers can distinguish from reactive containerDead, got %v", got)
-	}
-	if got := proxyErr.Response["error"]; got != "workspace container not running — restart triggered" {
-		t.Errorf("error message mismatch, got %q", got)
-	}
-
-	// Note: broadcaster firing is exercised by the production path's
-	// h.broadcaster.RecordAndBroadcast call but not asserted here — the
-	// real *events.Broadcaster doesn't expose received events for inspection.
-	// The DB UPDATE expectation is sufficient to pin the offline-flip path.
-}
-
-// TestPreflight_TransientError_FailsSoftAsAlive — IsRunning(true,err): the
-// (true, err) "fail-soft" contract — preflight returns nil so the optimistic
-// forward runs; reactive maybeMarkContainerDead handles a real failure later.
-// This pin is critical: a flaky daemon must NOT trigger a restart cascade.
-func TestPreflight_TransientError_FailsSoftAsAlive(t *testing.T) {
-	_ = setupTestDB(t)
-	stub := &preflightLocalProv{running: true, err: errors.New("docker daemon EOF")}
-	h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	h.provisioner = stub
-
-	if err := h.preflightContainerHealth(context.Background(), "ws-flaky-789"); err != nil {
-		t.Fatalf("preflight should return nil on transient error (fail-soft), got %+v", err)
-	}
-	// No DB UPDATE expected — sqlmock would complain about unexpected calls
-	// at test cleanup if the offline-flip path fired.
-}
-
-// TestProxyA2A_Preflight_RoutesThroughProvisionerSSOT — AST gate (#36 mirror
-// of #12's gate). Pins the invariant that preflightContainerHealth uses the
-// SSOT Provisioner.IsRunning helper, NOT a parallel docker.ContainerInspect
-// of its own.
-//
-// Mutation invariant: if a future PR replaces h.provisioner.IsRunning with
-// a direct cli.ContainerInspect call, this test fails. That's the signal to
-// either (a) extend Provisioner.IsRunning's contract OR (b) document why
-// this call site needs to differ. Either way, the drift gets a reviewer's
-// attention instead of shipping silently.
-func TestProxyA2A_Preflight_RoutesThroughProvisionerSSOT(t *testing.T) {
-	fset := token.NewFileSet()
-	file, err := parser.ParseFile(fset, "a2a_proxy_helpers.go", nil, parser.ParseComments)
-	if err != nil {
-		t.Fatalf("parse a2a_proxy_helpers.go: %v", err)
-	}
-
-	var fn *ast.FuncDecl
-	ast.Inspect(file, func(n ast.Node) bool {
-		f, ok := n.(*ast.FuncDecl)
-		if !ok || f.Name.Name != "preflightContainerHealth" {
-			return true
-		}
-		fn = f
-		return false
-	})
-	if fn == nil {
-		t.Fatal("preflightContainerHealth not found — was it renamed? update this gate or the SSOT routing assumption")
-	}
-
-	var (
-		callsIsRunning             bool
-		callsContainerInspectRaw   bool
-		callsRunningContainerNameDirect bool
-	)
-	ast.Inspect(fn.Body, func(n ast.Node) bool {
-		call, ok := n.(*ast.CallExpr)
-		if !ok {
-			return true
-		}
-		sel, ok := call.Fun.(*ast.SelectorExpr)
-		if !ok {
-			return true
-		}
-		switch sel.Sel.Name {
-		case "IsRunning":
-			callsIsRunning = true
-		case "ContainerInspect":
-			callsContainerInspectRaw = true
-		case "RunningContainerName":
-			// Direct RunningContainerName is also acceptable SSOT — but
-			// preferring IsRunning keeps the (bool, error) contract that
-			// already exists in the helper API surface.
-			callsRunningContainerNameDirect = true
-		}
-		return true
-	})
-
-	if !callsIsRunning && !callsRunningContainerNameDirect {
-		t.Errorf("preflightContainerHealth must call provisioner.IsRunning OR provisioner.RunningContainerName for the SSOT health check — see molecule-core#36. Found neither.")
-	}
-	if callsContainerInspectRaw {
-		t.Errorf("preflightContainerHealth carries a direct ContainerInspect call. This is the parallel-impl drift molecule-core#36 fixed. " +
-			"Either route through provisioner.IsRunning OR — if a new use case truly needs a different inspect — extend the helper's contract first and update this gate to allow the specific delta.")
-	}
-}
--- a/workspace-server/internal/handlers/eic_tunnel_pool.go
+++ b/workspace-server/internal/handlers/eic_tunnel_pool.go
@ -108,18 +108,6 @@ type eicTunnelPool struct {
 	// First acquirer takes the slot; later ones wait on the channel.
 	pendingSetups map[string]chan struct{}
 	stopJanitor   chan struct{}
-	// janitorInterval is captured at pool construction from the
-	// package-level poolJanitorInterval var. Captured (not re-read on
-	// every tick) so a test that swaps the package var via t.Cleanup
-	// after a global pool's janitor is already running can't race
-	// with that goroutine's ticker read. The global pool is created
-	// lazily once per process via sync.Once; before this capture
-	// landed, every test that touched poolJanitorInterval after the
-	// global pool's first-touch raced the janitor (caught by -race
-	// on staging tip 249dbc6a — TestPooledWithEICTunnel_PanicPoisonsEntry).
-	// Tests still get the new value on a freshPool() because they
-	// set the package var BEFORE calling newEICTunnelPool().
-	janitorInterval time.Duration
 }

 var (
@ -139,16 +127,11 @@ func getEICTunnelPool() *eicTunnelPool {

 // newEICTunnelPool constructs an empty pool. Exported so tests can
 // build isolated pools without sharing the singleton.
-//
-// Captures poolJanitorInterval at construction time so the janitor
-// goroutine doesn't race with t.Cleanup-driven swaps of the package
-// var. See the janitorInterval field comment for the failure mode.
 func newEICTunnelPool() *eicTunnelPool {
 	return &eicTunnelPool{
-		entries:         map[string]*pooledTunnel{},
-		pendingSetups:   map[string]chan struct{}{},
-		stopJanitor:     make(chan struct{}),
-		janitorInterval: poolJanitorInterval,
+		entries:       map[string]*pooledTunnel{},
+		pendingSetups: map[string]chan struct{}{},
+		stopJanitor:   make(chan struct{}),
 	}
 }

@ -307,11 +290,8 @@ func (p *eicTunnelPool) evictLRUIfFullLocked(skipInstance string) {
 // janitor periodically scans for entries that are idle AND expired,
 // closing their tunnels. Runs forever (per pool lifetime); cancelled
 // by close(p.stopJanitor) for tests that build short-lived pools.
-//
-// Reads p.janitorInterval (captured at construction) instead of the
-// package-level poolJanitorInterval — see janitorInterval field comment.
 func (p *eicTunnelPool) janitor() {
-	t := time.NewTicker(p.janitorInterval)
+	t := time.NewTicker(poolJanitorInterval)
 	defer t.Stop()
 	for {
 		select {
--- a/workspace-server/internal/handlers/handlers_extended_test.go
+++ b/workspace-server/internal/handlers/handlers_extended_test.go
@ -26,14 +26,6 @@ func TestExtended_WorkspaceDelete(t *testing.T) {
 		WithArgs(wsDelID).
 		WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))

-	// CascadeDelete walks descendants unconditionally (the 0-children
-	// optimization in the old inline path was dropped during the
-	// CascadeDelete extraction — descendant CTE returns 0 rows here,
-	// same end state, one extra cheap query).
-	mock.ExpectQuery("WITH RECURSIVE descendants").
-		WithArgs(wsDelID).
-		WillReturnRows(sqlmock.NewRows([]string{"id"}))
-
 	// #73: batch UPDATE happens BEFORE any container teardown.
 	// Uses ANY($1::uuid[]) even with a single ID for consistency.
 	mock.ExpectExec("UPDATE workspaces SET status =").
--- a/workspace-server/internal/handlers/local_e2e_dev_dept_test.go
+++ b/workspace-server/internal/handlers/local_e2e_dev_dept_test.go
@ -1,375 +0,0 @@
-package handlers
-
-import (
-	"archive/tar"
-	"bytes"
-	"net"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"testing"
-	"time"
-
-	"gopkg.in/yaml.v3"
-)
-
-// Local E2E for the dev-department extraction (RFC internal#77).
-//
-// Pre-conditions: both repos cloned as siblings under
-// /tmp/local-e2e-deploy/{molecule-dev, molecule-dev-department}.
-// (Set up by the orchestrator before running this test.)
-//
-// What this proves end-to-end through real platform code:
-//   1. resolveYAMLIncludes follows the dev-lead symlink at the parent's
-//      template root and pulls in the dev-department subtree.
-//   2. Recursive !include's inside the symlinked subtree resolve
-//      correctly via the chain dev-lead/workspace.yaml →
-//      ./core-lead/workspace.yaml → ./core-be/workspace.yaml etc.
-//   3. The resolved YAML unmarshals into a complete OrgTemplate with the
-//      expected count of workspaces (parent's PM+Marketing+Research +
-//      dev-department's atomized 28 workspaces).
-//
-// Skipped if the local-e2e-deploy fixture isn't present — won't block
-// CI on hosts that haven't set it up.
-func TestLocalE2E_DevDepartmentExtraction(t *testing.T) {
-	parent := "/tmp/local-e2e-deploy/molecule-dev"
-	if _, err := os.Stat(filepath.Join(parent, "org.yaml")); err != nil {
-		t.Skipf("local-e2e fixture not present at %s: %v", parent, err)
-	}
-
-	orgYAML, err := os.ReadFile(filepath.Join(parent, "org.yaml"))
-	if err != nil {
-		t.Fatalf("read org.yaml: %v", err)
-	}
-
-	expanded, err := resolveYAMLIncludes(orgYAML, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes failed: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(expanded, &tmpl); err != nil {
-		t.Fatalf("unmarshal expanded OrgTemplate: %v", err)
-	}
-
-	// Walk the full workspace tree, collect names.
-	names := []string{}
-	var walk func([]OrgWorkspace)
-	walk = func(ws []OrgWorkspace) {
-		for _, w := range ws {
-			names = append(names, w.Name)
-			walk(w.Children)
-		}
-	}
-	walk(tmpl.Workspaces)
-
-	t.Logf("org name: %q", tmpl.Name)
-	t.Logf("total workspaces (recursive): %d", len(names))
-	for _, n := range names {
-		t.Logf("  - %q", n)
-	}
-
-	// Expected: PM + Marketing Lead + Dev Lead at top level, plus the
-	// full sub-trees under each. After atomization, we expect:
-	//   - PM tree: PM + Research Lead + 3 research roles = 5
-	//   - Marketing tree: Marketing Lead + 5 marketing roles = 6
-	//   - Dev Lead tree: Dev Lead + (5 sub-team leads × ~6 each) +
-	//     3 floaters + Triage Operator = ~32
-	// Roughly ~43 total. Be liberal; just assert a floor.
-	if len(names) < 30 {
-		t.Errorf("workspace count too low (%d) — expected ~40+ (PM+Marketing+Dev tree)", len(names))
-	}
-
-	// Specific sentinel names we expect to find:
-	expected := []string{
-		"PM",
-		"Marketing Lead",
-		"Dev Lead",
-		"Core Platform Lead",
-		"Controlplane Lead",
-		"App & Docs Lead",
-		"Infra Lead",
-		"SDK Lead",
-		"Documentation Specialist", // Q1 — should be under app-lead
-		"Triage Operator",          // Q2 — should be under dev-lead
-	}
-	found := map[string]bool{}
-	for _, n := range names {
-		found[n] = true
-	}
-	for _, want := range expected {
-		if !found[want] {
-			t.Errorf("missing expected workspace %q", want)
-		}
-	}
-}
-
-// Stage-2 of the local e2e: prove every resolved workspace's `files_dir`
-// path actually consumes correctly through the rest of the import chain.
-// resolveYAMLIncludes returning a populated OrgTemplate is necessary but
-// not sufficient — `POST /org/import` then does:
-//
-//   1. resolveInsideRoot(orgBaseDir, ws.FilesDir) → must return a path
-//      that exists and stat-resolves to a directory (org_import.go:313-317).
-//   2. CopyTemplateToContainer(ctx, containerID, templatePath) → walks
-//      the dir with filepath.Walk and tars its contents into the
-//      workspace's /configs/ mount (provisioner.go:766-820).
-//
-// This stage-2 test exercises both #1 and #2 against every workspace in
-// the resolved tree, mimicking what the platform does post-include-
-// resolution. Catches: files_dir paths that don't resolve through the
-// symlink, paths that exist but are empty (silently produces empty
-// /configs/), or filepath.Walk failing to descend through cross-repo
-// symlink boundaries.
-func TestLocalE2E_FilesDirConsumption(t *testing.T) {
-	parent := "/tmp/local-e2e-deploy/molecule-dev"
-	if _, err := os.Stat(filepath.Join(parent, "org.yaml")); err != nil {
-		t.Skipf("local-e2e fixture not present at %s: %v", parent, err)
-	}
-
-	orgYAML, err := os.ReadFile(filepath.Join(parent, "org.yaml"))
-	if err != nil {
-		t.Fatalf("read org.yaml: %v", err)
-	}
-	expanded, err := resolveYAMLIncludes(orgYAML, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes: %v", err)
-	}
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(expanded, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-
-	// Flatten every workspace — including children, grandchildren, etc.
-	flat := []OrgWorkspace{}
-	var walk func([]OrgWorkspace)
-	walk = func(ws []OrgWorkspace) {
-		for _, w := range ws {
-			flat = append(flat, w)
-			walk(w.Children)
-		}
-	}
-	walk(tmpl.Workspaces)
-
-	checked := 0
-	for _, w := range flat {
-		if w.FilesDir == "" {
-			continue // workspace declared inline (no files_dir) — skip
-		}
-		checked++
-		t.Run(w.Name+"/"+w.FilesDir, func(t *testing.T) {
-			// Step 1: resolveInsideRoot returns a path that's-inside-root.
-			abs, err := resolveInsideRoot(parent, w.FilesDir)
-			if err != nil {
-				t.Fatalf("resolveInsideRoot(%q, %q): %v", parent, w.FilesDir, err)
-			}
-			info, err := os.Stat(abs)
-			if err != nil {
-				t.Fatalf("stat %q (resolved from files_dir %q): %v", abs, w.FilesDir, err)
-			}
-			if !info.IsDir() {
-				t.Fatalf("files_dir %q resolved to %q which is not a directory", w.FilesDir, abs)
-			}
-
-			// Step 2: walk the dir like CopyTemplateToContainer does.
-			// Mirror the platform's symlink-resolution at the root —
-			// filepath.Walk doesn't descend into a symlink leaf, so
-			// CopyTemplateToContainer (provisioner.go) calls
-			// EvalSymlinks on templatePath first. Replicate exactly.
-			if resolved, err := filepath.EvalSymlinks(abs); err == nil {
-				abs = resolved
-			}
-			var buf bytes.Buffer
-			tw := tar.NewWriter(&buf)
-			fileCount := 0
-			fileNames := []string{}
-			err = filepath.Walk(abs, func(path string, info os.FileInfo, err error) error {
-				if err != nil {
-					return err
-				}
-				rel, err := filepath.Rel(abs, path)
-				if err != nil {
-					return err
-				}
-				if rel == "." {
-					return nil
-				}
-				header, _ := tar.FileInfoHeader(info, "")
-				header.Name = rel
-				if err := tw.WriteHeader(header); err != nil {
-					return err
-				}
-				if !info.IsDir() {
-					fileCount++
-					fileNames = append(fileNames, rel)
-					data, err := os.ReadFile(path)
-					if err != nil {
-						return err
-					}
-					header.Size = int64(len(data))
-					tw.Write(data)
-				}
-				return nil
-			})
-			if err != nil {
-				t.Fatalf("filepath.Walk %q (mimics CopyTemplateToContainer): %v", abs, err)
-			}
-			tw.Close()
-
-			if fileCount == 0 {
-				t.Errorf("files_dir %q at %q is empty — CopyTemplateToContainer would produce empty /configs/",
-					w.FilesDir, abs)
-			}
-
-			// Sanity: every workspace folder should have AT LEAST one of
-			// {workspace.yaml, system-prompt.md, initial-prompt.md} —
-			// these are the markers a workspace folder is recognizable
-			// as a workspace (mirrors validator's WORKSPACE_FOLDER_MARKERS).
-			markers := []string{"workspace.yaml", "system-prompt.md", "initial-prompt.md"}
-			hasMarker := false
-			for _, name := range fileNames {
-				for _, m := range markers {
-					if name == m || strings.HasSuffix(name, "/"+m) {
-						hasMarker = true
-						break
-					}
-				}
-				if hasMarker {
-					break
-				}
-			}
-			if !hasMarker {
-				t.Errorf("files_dir %q at %q has %d files but none of the workspace markers %v — found: %v",
-					w.FilesDir, abs, fileCount, markers, fileNames)
-			}
-		})
-	}
-	t.Logf("checked %d workspaces with files_dir", checked)
-	if checked < 25 {
-		t.Errorf("expected ~28 workspaces with files_dir (post-atomization); only saw %d", checked)
-	}
-}
-
-// PR-C from the Phase 3a phasing (task #234): real-Gitea e2e for the
-// !external resolver against the LIVE molecule-ai/molecule-dev-department
-// repo. Verifies the production gitFetcher fetches the dev tree and the
-// resolver grafts it correctly into a parent template that has NO
-// symlink — composition is purely platform-side.
-//
-// Skipped if Gitea isn't reachable (offline / firewall / CI without
-// network). Requires `git` binary on PATH.
-func TestLocalE2E_ExternalDevDepartment(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-
-	// Skip if Gitea host isn't reachable (TCP probe). Avoids network-
-	// dependent tests failing on offline runners.
-	conn, err := net.DialTimeout("tcp", "git.moleculesai.app:443", 3*time.Second)
-	if err != nil {
-		t.Skipf("git.moleculesai.app:443 unreachable: %v", err)
-	}
-	conn.Close()
-
-	// Build a minimal parent template inline — no need for the
-	// /tmp/local-e2e-deploy/ symlinked fixture. The whole point of
-	// !external is that the parent template is self-contained;
-	// composition resolves over the network at import time.
-	parent := t.TempDir()
-
-	orgYAML := []byte(`name: External-Only Test Parent
-description: Parent template that pulls the entire dev tree via !external.
-defaults:
-  runtime: claude-code
-  tier: 2
-workspaces:
-  - !external
-    repo: molecule-ai/molecule-dev-department
-    ref: main
-    path: dev-lead/workspace.yaml
-`)
-	if err := os.WriteFile(filepath.Join(parent, "org.yaml"), orgYAML, 0o644); err != nil {
-		t.Fatalf("write org.yaml: %v", err)
-	}
-
-	out, err := resolveYAMLIncludes(orgYAML, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes (!external against live Gitea): %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-
-	// Walk the workspace tree, collect names + check files_dir paths.
-	flat := []OrgWorkspace{}
-	var walk func([]OrgWorkspace)
-	walk = func(ws []OrgWorkspace) {
-		for _, w := range ws {
-			flat = append(flat, w)
-			walk(w.Children)
-		}
-	}
-	walk(tmpl.Workspaces)
-
-	t.Logf("workspaces resolved through !external: %d", len(flat))
-	if len(flat) < 25 {
-		t.Errorf("expected ~28 dev-tree workspaces via !external; got %d", len(flat))
-	}
-
-	// Sentinel checks — same as TestLocalE2E_DevDepartmentExtraction
-	// (Q1+Q2 placements verified).
-	expected := []string{
-		"Dev Lead",
-		"Core Platform Lead",
-		"Controlplane Lead",
-		"App & Docs Lead",
-		"Documentation Specialist", // Q1
-		"Triage Operator",          // Q2
-	}
-	found := map[string]bool{}
-	for _, w := range flat {
-		found[w.Name] = true
-	}
-	for _, want := range expected {
-		if !found[want] {
-			t.Errorf("missing expected workspace %q", want)
-		}
-	}
-
-	// Every workspace's files_dir must be cache-prefixed (proves the
-	// path-rewrite ran end-to-end).
-	cachePrefix := ".external-cache"
-	for _, w := range flat {
-		if w.FilesDir == "" {
-			continue
-		}
-		if !strings.HasPrefix(w.FilesDir, cachePrefix) {
-			t.Errorf("workspace %q files_dir %q missing cache prefix %q", w.Name, w.FilesDir, cachePrefix)
-		}
-	}
-
-	// Verify the fetched cache exists and resolveInsideRoot accepts
-	// every workspace's files_dir (would cause provisioning to fail
-	// if not).
-	for _, w := range flat {
-		if w.FilesDir == "" {
-			continue
-		}
-		abs, err := resolveInsideRoot(parent, w.FilesDir)
-		if err != nil {
-			t.Errorf("workspace %q files_dir %q: resolveInsideRoot: %v", w.Name, w.FilesDir, err)
-			continue
-		}
-		info, err := os.Stat(abs)
-		if err != nil {
-			t.Errorf("workspace %q: stat %q: %v", w.Name, abs, err)
-			continue
-		}
-		if !info.IsDir() {
-			t.Errorf("workspace %q files_dir %q is not a directory", w.Name, w.FilesDir)
-		}
-	}
-}
--- a/workspace-server/internal/handlers/mock_runtime.go
+++ b/workspace-server/internal/handlers/mock_runtime.go
@ -1,223 +0,0 @@
-package handlers
-
-// mock_runtime.go — "mock" runtime: a virtual workspace that has no
-// container, no EC2, no LLM, just hardcoded canned A2A replies. Built
-// for the funding-demo "200-workspace mock org" so hongming can show
-// investors a CEO/VPs/Managers/ICs hierarchy at scale without burning
-// 200 EC2 instances or 200 Anthropic keys.
-//
-// Wire model:
-//   - org template declares `runtime: mock` on every workspace
-//   - createWorkspaceTree skips provisioning, sets status='online'
-//     directly (mirrors the `external` short-circuit, minus the URL +
-//     awaiting_agent dance)
-//   - proxyA2ARequest short-circuits on a mock-runtime target and
-//     returns a canned JSON-RPC reply; never calls resolveAgentURL,
-//     never opens an HTTP connection, never touches Docker/EC2
-//
-// The reply is JSON-RPC 2.0 + a2a-sdk v0.3 shape so the canvas's
-// extractAgentText / extractTextsFromParts read it without any
-// special-casing. We rotate over a small variant pool so a screen
-// full of replies doesn't all read identical — gives the demo a bit
-// of life without pretending to be a real agent.
-
-import (
-	"context"
-	"crypto/sha1"
-	"database/sql"
-	"encoding/binary"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"log"
-	"net/http"
-	"strings"
-	"time"
-
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
-	"github.com/gin-gonic/gin"
-	"github.com/google/uuid"
-)
-
-// MockRuntimeName is the canonical runtime string a workspace row
-// carries to opt into the canned-reply short-circuit. Kept as a const
-// so the proxy's runtime-check + the org-import skip-block reference
-// the same literal.
-const MockRuntimeName = "mock"
-
-// mockReplyVariants is the pool of canned strings the mock runtime
-// rotates through. Picked to read like a busy-but-short reply from a
-// real human in a hierarchy — a CEO would NOT respond with "On it!",
-// but for the demo every node is shown to be reachable, so we lean
-// into the variety. Variant selection is deterministic per
-// (workspaceID, request-id) pair so a screen recording replays the
-// same reply for the same input.
-var mockReplyVariants = []string{
-	"On it!",
-	"Got it, on it now.",
-	"On it, boss.",
-	"Working on it.",
-	"Acknowledged — on it.",
-	"On it, will report back.",
-	"Roger that, on it.",
-	"Copy that. On it.",
-	"On it — ETA shortly.",
-	"On it. Standby for update.",
-}
-
-// pickMockReply returns a canned reply for the given workspaceID +
-// requestID. Deterministic so the same (workspace, message-id) pair
-// always picks the same variant — useful for screen recordings and
-// flake-free e2e snapshots. Falls back to variant[0] if the inputs
-// are empty.
-func pickMockReply(workspaceID, requestID string) string {
-	if len(mockReplyVariants) == 0 {
-		return "On it!"
-	}
-	if workspaceID == "" && requestID == "" {
-		return mockReplyVariants[0]
-	}
-	h := sha1.Sum([]byte(workspaceID + ":" + requestID))
-	idx := int(binary.BigEndian.Uint32(h[0:4]) % uint32(len(mockReplyVariants)))
-	return mockReplyVariants[idx]
-}
-
-// lookupRuntime returns the workspace's runtime string. Empty when the
-// row is missing / DB hiccup so callers fall through to the existing
-// dispatch path (which will then 404 / 502 normally). Fail-open here
-// because a transient DB error must not silently flip a real workspace
-// into mock-mode and start handing out canned replies in place of
-// genuine agent traffic.
-func lookupRuntime(ctx context.Context, workspaceID string) string {
-	var runtime sql.NullString
-	err := db.DB.QueryRowContext(ctx,
-		`SELECT runtime FROM workspaces WHERE id = $1`, workspaceID,
-	).Scan(&runtime)
-	if err != nil {
-		if !errors.Is(err, sql.ErrNoRows) {
-			log.Printf("ProxyA2A: lookupRuntime(%s) failed (%v) — falling through to dispatch path", workspaceID, err)
-		}
-		return ""
-	}
-	if !runtime.Valid {
-		return ""
-	}
-	return runtime.String
-}
-
-// buildMockA2AResponse synthesises a JSON-RPC 2.0 success envelope that
-// matches the a2a-sdk v0.3 reply shape the canvas's extractAgentText
-// already understands: `{result: {parts: [{kind: "text", text: ...}]}}`.
-// `requestID` is the JSON-RPC `id` of the inbound request — A2A
-// implementations echo it on the reply so callers can correlate. We
-// extract it from the normalized payload in the caller and pass it in
-// here so this function stays JSON-only (no payload parsing).
-//
-// Returns marshalled bytes ready to write straight to the HTTP body.
-// Marshal failure is logged + a tiny fallback envelope returned, since
-// failing the whole request because of a JSON encoding hiccup on a
-// constant-shaped payload would defeat the "mock always works" guarantee.
-func buildMockA2AResponse(workspaceID, requestID, replyText string) []byte {
-	if requestID == "" {
-		requestID = uuid.New().String()
-	}
-	envelope := map[string]any{
-		"jsonrpc": "2.0",
-		"id":      requestID,
-		"result": map[string]any{
-			"parts": []map[string]any{
-				{"kind": "text", "text": replyText},
-			},
-		},
-	}
-	out, err := json.Marshal(envelope)
-	if err != nil {
-		log.Printf("ProxyA2A: mock-runtime response marshal failed for %s: %v — emitting fallback", workspaceID, err)
-		// Hand-rolled minimal envelope. Safe because every value is a
-		// hardcoded constant string with no characters that need
-		// escaping in a JSON string literal.
-		fallback := fmt.Sprintf(
-			`{"jsonrpc":"2.0","id":%q,"result":{"parts":[{"kind":"text","text":%q}]}}`,
-			requestID, replyText,
-		)
-		return []byte(fallback)
-	}
-	return out
-}
-
-// extractRequestID pulls the JSON-RPC `id` out of an already-normalized
-// A2A payload. Returns "" when the field is absent or not a string —
-// caller substitutes a fresh UUID. Tolerant of every shape
-// normalizeA2APayload could produce.
-func extractRequestID(body []byte) string {
-	var top map[string]json.RawMessage
-	if err := json.Unmarshal(body, &top); err != nil {
-		return ""
-	}
-	raw, ok := top["id"]
-	if !ok {
-		return ""
-	}
-	var s string
-	if json.Unmarshal(raw, &s) == nil {
-		return s
-	}
-	// JSON-RPC permits numeric IDs too; canvas issues UUIDs but be
-	// defensive against alternative SDKs.
-	var n json.Number
-	if json.Unmarshal(raw, &n) == nil {
-		return n.String()
-	}
-	return ""
-}
-
-// handleMockA2A is the proxy short-circuit for mock-runtime workspaces.
-// Returns (status, body, true) when the target is mock — caller writes
-// the response and returns. Returns (_, _, false) when the target is
-// not mock — caller continues to the real dispatch path.
-//
-// Side-effects: writes a synthetic activity_logs row via logA2ASuccess
-// when logActivity is true so the canvas's "Agent Comms" tab shows the
-// mock reply in the trace alongside real-agent traffic. Without this
-// the demo would render messages on the canvas chat panel but a peer
-// node clicking through to its activity tab would see an empty list.
-func (h *WorkspaceHandler) handleMockA2A(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, logActivity bool) (int, []byte, bool) {
-	if lookupRuntime(ctx, workspaceID) != MockRuntimeName {
-		return 0, nil, false
-	}
-	requestID := extractRequestID(body)
-	replyText := pickMockReply(workspaceID, requestID)
-	respBody := buildMockA2AResponse(workspaceID, requestID, replyText)
-
-	// Tiny artificial delay so the canvas chat UI has time to render
-	// the user's outgoing bubble before the agent reply appears.
-	// Without it the reply lands the same animation frame and feels
-	// robotic. 80ms is too fast to look "real" but masks the React
-	// double-render race that drops the user bubble entirely on slow
-	// machines (observed locally on M1 Air, 2026-05-07). Below 200ms
-	// keeps a 200-node demo snappy when investors fan out 30 messages
-	// at once.
-	time.Sleep(80 * time.Millisecond)
-
-	if logActivity {
-		// Reuse the existing success-logger so the activity feed shape
-		// is identical to a real agent reply. Status 200 + duration 0
-		// is the "synthesised reply" marker; activity_logs.duration_ms
-		// being 0 is harmless (real fast paths can hit 0 too).
-		h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, http.StatusOK, 0)
-	}
-	return http.StatusOK, respBody, true
-}
-
-// IsMockRuntime is a small public helper for callers outside this
-// package (tests, the org importer) that need to ask the question
-// without depending on the unexported constant. Trims + lower-cases
-// so a typoed YAML cell like "  Mock " still resolves correctly.
-func IsMockRuntime(runtime string) bool {
-	return strings.EqualFold(strings.TrimSpace(runtime), MockRuntimeName)
-}
-
-// gin import is unused at file scope but kept as a tag so a future
-// addition of a thin HTTP handler (e.g. POST /workspaces/:id/mock/replies
-// for an admin-set custom reply pool) doesn't need an import re-order.
-var _ = gin.H{}
--- a/workspace-server/internal/handlers/mock_runtime_test.go
+++ b/workspace-server/internal/handlers/mock_runtime_test.go
@ -1,266 +0,0 @@
-package handlers
-
-// mock_runtime_test.go — locks the contract for the mock-runtime
-// short-circuit added for the funding-demo "200-workspace mock org"
-// template. Three invariants:
-//
-//   1. ProxyA2A on a workspace with runtime='mock' must return 200
-//      with a JSON-RPC reply containing one text part. NO HTTP
-//      dispatch, NO resolveAgentURL DB read (mock workspaces have
-//      no URL — that read would 404 and break the demo).
-//
-//   2. The reply text must be one of the canned variants and must be
-//      deterministic for a given (workspace_id, request_id) pair so
-//      screen recordings replay identically.
-//
-//   3. Workspaces with runtime != 'mock' must NOT be affected — the
-//      mock check fails fast and falls through to the existing
-//      dispatch path. Same kind of regression guard the poll-mode
-//      tests carry.
-
-import (
-	"bytes"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-
-	"github.com/DATA-DOG/go-sqlmock"
-	"github.com/gin-gonic/gin"
-)
-
-// TestProxyA2A_MockRuntime_ReturnsCannedReply is the happy-path
-// contract. A workspace flagged runtime='mock' must:
-//   - return 200 with JSON-RPC envelope {result:{parts:[{kind:text,text:...}]}}
-//   - not dispatch HTTP (no SELECT url SQL expected)
-//   - reply text is one of mockReplyVariants
-func TestProxyA2A_MockRuntime_ReturnsCannedReply(t *testing.T) {
-	mock := setupTestDB(t)
-	setupTestRedis(t)
-	broadcaster := newTestBroadcaster()
-	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
-
-	const wsID = "ws-mock-canned"
-
-	// Budget check fires before runtime lookup (same as the poll-mode
-	// short-circuit) — keeps mock workspaces honest if a tenant ever
-	// sets a budget on one. Unlikely on a demo, but the guard stays
-	// uniform so future "monthly_spend on mock = 0" assertions don't
-	// drift.
-	expectBudgetCheck(mock, wsID)
-
-	// lookupDeliveryMode runs first — return push so the poll
-	// short-circuit doesn't fire and we hit the mock check.
-	mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
-		WithArgs(wsID).
-		WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("push"))
-
-	// lookupRuntime SELECT — returns 'mock', triggering the canned-reply
-	// short-circuit. CRITICAL: NO ExpectQuery for `SELECT url, status
-	// FROM workspaces` (resolveAgentURL's query). If the short-circuit
-	// fails to fire, sqlmock will surface "unexpected query" on the URL
-	// SELECT and the test fails loudly — that's the dispatch-leak detector.
-	mock.ExpectQuery("SELECT runtime FROM workspaces WHERE id").
-		WithArgs(wsID).
-		WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("mock"))
-
-	// Activity log: logA2ASuccess writes the synthetic reply to
-	// activity_logs so the canvas's Agent Comms tab shows it alongside
-	// real-agent traffic.
-	mock.ExpectExec("INSERT INTO activity_logs").
-		WillReturnResult(sqlmock.NewResult(0, 1))
-
-	w := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(w)
-	c.Params = gin.Params{{Key: "id", Value: wsID}}
-
-	body := `{"jsonrpc":"2.0","id":"req-mock-1","method":"message/send","params":{"message":{"role":"user","parts":[{"kind":"text","text":"hello mock"}]}}}`
-	c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/a2a", bytes.NewBufferString(body))
-	c.Request.Header.Set("Content-Type", "application/json")
-
-	handler.ProxyA2A(c)
-
-	// logA2ASuccess fires async — give it a moment to settle so
-	// ExpectationsWereMet doesn't flake.
-	time.Sleep(200 * time.Millisecond)
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
-	}
-	var resp map[string]interface{}
-	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
-		t.Fatalf("response is not valid JSON: %v", err)
-	}
-	if resp["jsonrpc"] != "2.0" {
-		t.Errorf("response.jsonrpc = %v, want 2.0", resp["jsonrpc"])
-	}
-	if resp["id"] != "req-mock-1" {
-		t.Errorf("response.id = %v, want %q (echoed from request)", resp["id"], "req-mock-1")
-	}
-	result, _ := resp["result"].(map[string]interface{})
-	if result == nil {
-		t.Fatalf("response.result missing or wrong type: %v", resp["result"])
-	}
-	parts, _ := result["parts"].([]interface{})
-	if len(parts) != 1 {
-		t.Fatalf("expected exactly one part, got %d: %v", len(parts), parts)
-	}
-	part, _ := parts[0].(map[string]interface{})
-	if part["kind"] != "text" {
-		t.Errorf("part.kind = %v, want text", part["kind"])
-	}
-	text, _ := part["text"].(string)
-	if text == "" {
-		t.Error("part.text is empty — canned reply not populated")
-	}
-	// Reply must be one of the variants.
-	matched := false
-	for _, v := range mockReplyVariants {
-		if v == text {
-			matched = true
-			break
-		}
-	}
-	if !matched {
-		t.Errorf("reply text %q is not in mockReplyVariants", text)
-	}
-
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("unmet sqlmock expectations: %v", err)
-	}
-}
-
-// TestProxyA2A_NonMockRuntime_NoShortCircuit verifies the symmetric
-// contract: a workspace with a real runtime (claude-code, hermes, etc.)
-// must NOT be affected by the mock check — it falls through to the
-// real dispatch path. Without this guard, a regression in
-// lookupRuntime could silently flip every workspace into mock-mode
-// and start handing out canned replies in place of real-agent traffic.
-func TestProxyA2A_NonMockRuntime_NoShortCircuit(t *testing.T) {
-	mock := setupTestDB(t)
-	mr := setupTestRedis(t)
-	allowLoopbackForTest(t)
-	broadcaster := newTestBroadcaster()
-	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
-
-	const wsID = "ws-real-runtime"
-
-	dispatched := false
-	agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		dispatched = true
-		w.Header().Set("Content-Type", "application/json")
-		w.Write([]byte(`{"jsonrpc":"2.0","id":"1","result":{"status":"ok"}}`))
-	}))
-	defer agentServer.Close()
-	mr.Set("ws:"+wsID+":url", agentServer.URL)
-
-	expectBudgetCheck(mock, wsID)
-
-	// poll-mode SELECT — return push so we proceed past the poll
-	// short-circuit.
-	mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
-		WithArgs(wsID).
-		WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("push"))
-
-	// runtime SELECT — return claude-code so the mock check falls
-	// through.
-	mock.ExpectQuery("SELECT runtime FROM workspaces WHERE id").
-		WithArgs(wsID).
-		WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
-
-	mock.ExpectExec("INSERT INTO activity_logs").
-		WillReturnResult(sqlmock.NewResult(0, 1))
-
-	w := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(w)
-	c.Params = gin.Params{{Key: "id", Value: wsID}}
-	body := `{"jsonrpc":"2.0","id":"real-1","method":"message/send","params":{"message":{"role":"user","parts":[{"kind":"text","text":"hi"}]}}}`
-	c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/a2a", bytes.NewBufferString(body))
-	c.Request.Header.Set("Content-Type", "application/json")
-
-	handler.ProxyA2A(c)
-
-	time.Sleep(50 * time.Millisecond)
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
-	}
-	if !dispatched {
-		t.Error("non-mock runtime: expected the agent server to receive the request, but it did not — mock short-circuit may be over-firing")
-	}
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("unmet sqlmock expectations: %v", err)
-	}
-}
-
-// TestPickMockReply_Deterministic locks the determinism contract:
-// the same (workspaceID, requestID) input must yield the same variant
-// every call. Required for screen recordings + flake-free e2e
-// snapshots.
-func TestPickMockReply_Deterministic(t *testing.T) {
-	cases := []struct {
-		ws, req string
-	}{
-		{"ws-1", "req-A"},
-		{"ws-1", "req-B"},
-		{"ws-2", "req-A"},
-		{"", ""},
-	}
-	for _, tc := range cases {
-		first := pickMockReply(tc.ws, tc.req)
-		for i := 0; i < 10; i++ {
-			next := pickMockReply(tc.ws, tc.req)
-			if next != first {
-				t.Errorf("pickMockReply(%q,%q) is not deterministic: got %q then %q",
-					tc.ws, tc.req, first, next)
-			}
-		}
-	}
-}
-
-// TestIsMockRuntime_TrimsAndCaseInsensitive — typos and stray
-// whitespace in YAML must still resolve to mock so a single
-// runtime: " Mock " entry doesn't silently get dispatched.
-func TestIsMockRuntime_TrimsAndCaseInsensitive(t *testing.T) {
-	cases := map[string]bool{
-		"mock":      true,
-		"MOCK":      true,
-		"  Mock  ":  true,
-		"mocky":     false,
-		"":          false,
-		"external":  false,
-		"claude-code": false,
-	}
-	for in, want := range cases {
-		if got := IsMockRuntime(in); got != want {
-			t.Errorf("IsMockRuntime(%q) = %v, want %v", in, got, want)
-		}
-	}
-}
-
-// TestBuildMockA2AResponse_EchoesRequestID — JSON-RPC requires the
-// reply id to match the request id so callers can correlate. Mock
-// must hold this contract or canvas's correlation logic breaks.
-func TestBuildMockA2AResponse_EchoesRequestID(t *testing.T) {
-	out := buildMockA2AResponse("ws-x", "req-echo-7", "On it!")
-	var resp map[string]interface{}
-	if err := json.Unmarshal(out, &resp); err != nil {
-		t.Fatalf("response is not valid JSON: %v", err)
-	}
-	if resp["id"] != "req-echo-7" {
-		t.Errorf("id = %v, want req-echo-7", resp["id"])
-	}
-	if resp["jsonrpc"] != "2.0" {
-		t.Errorf("jsonrpc = %v, want 2.0", resp["jsonrpc"])
-	}
-	result, _ := resp["result"].(map[string]interface{})
-	parts, _ := result["parts"].([]interface{})
-	if len(parts) != 1 {
-		t.Fatalf("expected 1 part, got %d", len(parts))
-	}
-	p, _ := parts[0].(map[string]interface{})
-	if p["text"] != "On it!" {
-		t.Errorf("part.text = %v, want On it!", p["text"])
-	}
-}
--- a/workspace-server/internal/handlers/org.go
+++ b/workspace-server/internal/handlers/org.go
@ -13,15 +13,12 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
-	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
 	"github.com/gin-gonic/gin"
-	"github.com/lib/pq"
 	"gopkg.in/yaml.v3"
 )

@ -425,16 +422,6 @@ type OrgWorkspace struct {
 	Tier     int    `yaml:"tier" json:"tier"`
 	Template string `yaml:"template" json:"template"`
 	FilesDir string `yaml:"files_dir" json:"files_dir"`
-	// Spawning gates whether this workspace (AND its descendants) gets
-	// provisioned during /org/import. Pointer so we can distinguish
-	// "explicitly set to false" from "unset" (default = spawn). Use case:
-	// the dev-tree org template declares the full team structure but a
-	// developer's local machine only has RAM for a subset; setting
-	// spawning: false on a leaf or a sub-tree root skips that branch
-	// entirely without editing the canonical template structure.
-	// Counted in countWorkspaces same as actual; subtree-skip happens
-	// at provision time in createWorkspaceTree.
-	Spawning *bool `yaml:"spawning,omitempty" json:"spawning,omitempty"`
 	// SystemPrompt is an inline override. Normally each role's system-prompt.md
 	// lives at `<files_dir>/system-prompt.md` and is copied via the files_dir
 	// template-copy step; inline overrides that path for ad-hoc workspaces.
@ -571,19 +558,6 @@ func (h *OrgHandler) Import(c *gin.Context) {
 	var body struct {
 		Dir      string      `json:"dir"`      // org template directory name
 		Template OrgTemplate `json:"template"` // or inline template
-		// Mode controls cleanup behavior of pre-existing workspaces:
-		//   ""        / "merge"     — additive (default; current behavior).
-		//                              Existing workspaces matched by
-		//                              (parent_id, name) are skipped; nothing
-		//                              outside the new tree is touched.
-		//   "reconcile"             — additive + cleanup. After import, any
-		//                              online workspace whose name matches an
-		//                              imported workspace's name but whose id
-		//                              isn't in the import result set is
-		//                              cascade-deleted. Catches "previous
-		//                              import survived a re-import" zombies
-		//                              (the 20:13→21:17 dev-tree case).
-		Mode string `json:"mode"`
 	}
 	if err := c.ShouldBindJSON(&body); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
@ -629,19 +603,6 @@ func (h *OrgHandler) Import(c *gin.Context) {
 		return
 	}

-	// Emit started AFTER the YAML is loaded so payload.name carries the
-	// resolved template name (was: empty when caller passed `dir` instead
-	// of inline `template`). Pre-parse error paths above return without
-	// emitting — semantically "we couldn't even start an import" — so
-	// every started event is guaranteed a paired completed/failed below
-	// (no orphan started rows in structure_events).
-	importStart := time.Now()
-	emitOrgEvent(c.Request.Context(), "org.import.started", map[string]any{
-		"name": tmpl.Name,
-		"dir":  body.Dir,
-		"mode": body.Mode,
-	})
-
 	// Required-env preflight — refuses import when any required_env is
 	// missing from global_secrets. No bypass: the prior `force: true`
 	// escape hatch was removed (issue #2290) because it was the silent
@ -747,171 +708,18 @@ func (h *OrgHandler) Import(c *gin.Context) {
 		}
 	}

-	// Reconcile mode: prune workspaces present from a previous import that
-	// share a name with the new tree but are NOT in the new result set.
-	// Catches the additive-import bug where re-running /org/import with a
-	// changed tree shape (different parent_id for the same role name) leaves
-	// the prior workspace online — visible to the canvas, consuming
-	// containers, and looking like a duplicate. Default mode "" / "merge"
-	// preserves the old additive behavior.
-	reconcileRemovedCount := 0
-	reconcileSkipped := 0
-	reconcileErrs := []string{}
-	if body.Mode == "reconcile" && createErr == nil {
-		ctx := c.Request.Context()
-		importedNames := []string{}
-		walkOrgWorkspaceNames(tmpl.Workspaces, &importedNames)
-
-		importedIDs := make([]string, 0, len(results))
-		for _, r := range results {
-			if id, ok := r["id"].(string); ok && id != "" {
-				importedIDs = append(importedIDs, id)
-			}
-		}
-
-		// Empty-set guards: if the import didn't produce any names or any
-		// IDs, skip — querying with empty arrays would either match
-		// nothing (harmless) or, worse, match every workspace if a future
-		// query rewrite drops the IN clause. Belt-and-suspenders.
-		if len(importedNames) > 0 && len(importedIDs) > 0 {
-			rows, err := db.DB.QueryContext(ctx, `
-				SELECT id FROM workspaces
-				WHERE name = ANY($1::text[])
-				  AND id != ALL($2::uuid[])
-				  AND status != 'removed'
-			`, pq.Array(importedNames), pq.Array(importedIDs))
-			if err != nil {
-				log.Printf("Org import reconcile: orphan query failed: %v", err)
-				reconcileErrs = append(reconcileErrs, fmt.Sprintf("orphan query: %v", err))
-			} else {
-				orphanIDs := []string{}
-				for rows.Next() {
-					var orphanID string
-					if rows.Scan(&orphanID) == nil {
-						orphanIDs = append(orphanIDs, orphanID)
-					}
-				}
-				rows.Close()
-
-				for _, oid := range orphanIDs {
-					descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
-					if err != nil {
-						log.Printf("Org import reconcile: CascadeDelete(%s) failed: %v", oid, err)
-						reconcileErrs = append(reconcileErrs, fmt.Sprintf("delete %s: %v", oid, err))
-						reconcileSkipped++
-						continue
-					}
-					reconcileRemovedCount += 1 + len(descendantIDs)
-					if len(stopErrs) > 0 {
-						log.Printf("Org import reconcile: %s had %d stop errors (orphan sweeper will retry)", oid, len(stopErrs))
-					}
-				}
-				log.Printf("Org import reconcile: %d orphans removed (%d cascade descendants), %d skipped", len(orphanIDs), reconcileRemovedCount-len(orphanIDs), reconcileSkipped)
-			}
-		}
-	}
-
 	status := http.StatusCreated
 	resp := gin.H{
 		"org":        tmpl.Name,
 		"workspaces": results,
 		"count":      len(results),
 	}
-	if body.Mode == "reconcile" {
-		resp["mode"] = "reconcile"
-		resp["reconcile_removed_count"] = reconcileRemovedCount
-		if len(reconcileErrs) > 0 {
-			resp["reconcile_errors"] = reconcileErrs
-		}
-	}
 	if createErr != nil {
 		status = http.StatusMultiStatus
 		resp["error"] = createErr.Error()
 	}

-	// results contains both freshly-created AND lookupExistingChild skips
-	// (entries with "skipped":true). Splitting the count here so the audit
-	// row reflects "what changed" vs "what was already there" — telemetry
-	// readers shouldn't need to grep stdout to tell an idempotent re-run
-	// apart from a fresh-create.
-	createdCount, skippedCount := 0, 0
-	for _, r := range results {
-		if skipped, _ := r["skipped"].(bool); skipped {
-			skippedCount++
-		} else {
-			createdCount++
-		}
-	}
-	log.Printf("Org import: %s — %d created, %d skipped, %d reconciled",
-		tmpl.Name, createdCount, skippedCount, reconcileRemovedCount)
-	emitOrgEvent(c.Request.Context(), "org.import.completed", map[string]any{
-		"name":                    tmpl.Name,
-		"dir":                     body.Dir,
-		"mode":                    body.Mode,
-		"created_count":           createdCount,
-		"skipped_count":           skippedCount,
-		"reconcile_removed_count": reconcileRemovedCount,
-		"reconcile_errors":        len(reconcileErrs),
-		"duration_ms":             time.Since(importStart).Milliseconds(),
-		"create_error":            errString(createErr),
-	})
+	log.Printf("Org import: %s — %d workspaces created", tmpl.Name, len(results))
 	c.JSON(status, resp)
 }

-// walkOrgWorkspaceNames collects every Name in the tree (in any order) into
-// names. Used by reconcile to detect orphan workspaces — workspaces with the
-// same role name as a freshly-imported one but a different id, surviving from
-// a prior import.
-func walkOrgWorkspaceNames(workspaces []OrgWorkspace, names *[]string) {
-	for _, w := range workspaces {
-		// spawning:false subtrees are still part of the imported tree
-		// from a logical-tree perspective — DON'T skip the recursion,
-		// or reconcile would orphan the rest of the subtree on every
-		// re-import where spawning is toggled. Names of skipped
-		// workspaces remain registered so reconcile won't double-create
-		// them when spawning flips back to true.
-		if w.Name != "" {
-			*names = append(*names, w.Name)
-		}
-		walkOrgWorkspaceNames(w.Children, names)
-	}
-}
-
-// emitOrgEvent records an org-lifecycle event in structure_events so the
-// import history is queryable independent of stdout log retention. Errors
-// are logged and swallowed — never block the request path on telemetry.
-//
-// Event-type taxonomy (extend by appending; never rename):
-//
-//	org.import.started        — handler entered, request body parsed
-//	org.import.completed      — handler exiting (success or partial)
-//	org.import.failed         — handler exiting with an unrecoverable error
-//
-// payload fields are documented at each call site.
-func emitOrgEvent(ctx context.Context, eventType string, payload map[string]any) {
-	if payload == nil {
-		payload = map[string]any{}
-	}
-	payloadJSON, err := json.Marshal(payload)
-	if err != nil {
-		log.Printf("emitOrgEvent: marshal %s payload failed: %v", eventType, err)
-		return
-	}
-	if _, err := db.DB.ExecContext(ctx, `
-		INSERT INTO structure_events (event_type, payload, created_at)
-		VALUES ($1, $2, now())
-	`, eventType, payloadJSON); err != nil {
-		log.Printf("emitOrgEvent: insert %s failed: %v", eventType, err)
-	}
-}
-
-// errString returns "" for a nil error, err.Error() otherwise. Lets us put
-// nullable error strings in event payloads without checking for nil at every
-// call site.
-func errString(err error) string {
-	if err == nil {
-		return ""
-	}
-	return err.Error()
-}
-
--- a/workspace-server/internal/handlers/org_external.go
+++ b/workspace-server/internal/handlers/org_external.go
@ -1,439 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"fmt"
-	"net/url"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"regexp"
-	"strings"
-	"time"
-
-	"gopkg.in/yaml.v3"
-)
-
-// External-ref resolver — gitops-style cross-repo subtree composition.
-// Internal#77 RFC, Phase 3a (task #222). Prior art: Helm subcharts +
-// dependency cache, Kustomize remote bases, Terraform module sources.
-//
-// Schema (a `!external`-tagged mapping anywhere a workspace entry is
-// allowed — workspaces:, roots:, children:):
-//
-//   - !external
-//     repo: molecule-ai/molecule-dev-department
-//     ref: main
-//     path: dev-lead/workspace.yaml
-//
-// At resolve time, the platform fetches the repo at ref into a content-
-// addressable cache under <rootDir>/.external-cache/<repo>/<sha>/, loads
-// the yaml at <cacheDir>/<path>, rewrites every files_dir + relative
-// !include path to be cache-prefixed, then grafts the result in place of
-// the !external node. Downstream pipeline (resolveInsideRoot, plugin
-// merge, CopyTemplateToContainer) sees ordinary in-tree paths.
-
-// ExternalRef is the deserialized form of an `!external`-tagged mapping.
-type ExternalRef struct {
-	Repo string `yaml:"repo"`
-	Ref  string `yaml:"ref"`
-	Path string `yaml:"path"`
-
-	// URL overrides the default Gitea host. Optional; defaults to
-	// MOLECULE_EXTERNAL_GITEA_URL env or git.moleculesai.app.
-	URL string `yaml:"url,omitempty"`
-}
-
-const (
-	// maxExternalDepth caps recursion through nested `!external`s. Lower
-	// than maxIncludeDepth (16) because each level may issue a network
-	// fetch. Composition that genuinely needs >4 layers is a smell.
-	maxExternalDepth = 4
-
-	// externalCacheDirName is the per-template cache subdir under rootDir.
-	// Content-addressable: keyed by (repo, sha). Operators add this to
-	// .gitignore — cache is platform-mutated, not source-tracked.
-	externalCacheDirName = ".external-cache"
-
-	// gitFetchTimeout caps a single clone operation. Conservative —
-	// org template fetches are typically <100KB.
-	gitFetchTimeout = 60 * time.Second
-)
-
-// safeRefPattern restricts `ref` values to characters git itself accepts
-// for branch / tag / SHA. Belt-and-braces over git's own validation.
-var safeRefPattern = regexp.MustCompile(`^[a-zA-Z0-9_./-]+$`)
-
-// allowlistedHostPath returns true if `<host>/<repo>` matches the
-// configured allowlist. Default allowlist: git.moleculesai.app/molecule-ai/.
-// Override via MOLECULE_EXTERNAL_REPO_ALLOWLIST env var (comma-separated
-// patterns). Patterns are matched as prefixes (with trailing-slash
-// semantics) or as exact matches. Trailing /* is treated as "any
-// descendants of this prefix".
-//
-// Examples:
-//   - "git.moleculesai.app/molecule-ai/" → matches molecule-ai/* (any repo)
-//   - "git.moleculesai.app/molecule-ai/*" → same; trailing /* normalized to /
-//   - "git.moleculesai.app/molecule-ai/molecule-dev-department" → exact
-//   - "git.moleculesai.app/" → matches everything on that host
-func allowlistedHostPath(host, repoPath string) bool {
-	allow := os.Getenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST")
-	if allow == "" {
-		allow = "git.moleculesai.app/molecule-ai/"
-	}
-	hp := host + "/" + repoPath
-	for _, pat := range strings.Split(allow, ",") {
-		pat = strings.TrimSpace(pat)
-		if pat == "" {
-			continue
-		}
-		// Normalize trailing /* → /
-		pat = strings.TrimSuffix(pat, "*")
-		if pat == hp {
-			return true
-		}
-		if strings.HasSuffix(pat, "/") && strings.HasPrefix(hp+"/", pat) {
-			return true
-		}
-	}
-	return false
-}
-
-// externalFetcher abstracts the git-clone-into-cache step. Production
-// uses gitFetcher (shells out to git); tests inject a fake that
-// pre-stages content in a temp dir.
-type externalFetcher interface {
-	// Fetch ensures rootDir/.external-cache/<safe-repo>/<sha>/ contains
-	// the repo content at the given ref. Returns the absolute cache
-	// dir + the resolved SHA. Cache hit = no network. Cache miss =
-	// clone.
-	Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (cacheDir, sha string, err error)
-}
-
-// defaultExternalFetcher is the package-level fetcher injection point.
-// Production code uses the git-shell fetcher; tests override via
-// SetExternalFetcherForTest.
-var defaultExternalFetcher externalFetcher = &gitFetcher{}
-
-// SetExternalFetcherForTest swaps the fetcher for testing. Returns a
-// cleanup func that restores the previous fetcher.
-func SetExternalFetcherForTest(f externalFetcher) func() {
-	prev := defaultExternalFetcher
-	defaultExternalFetcher = f
-	return func() { defaultExternalFetcher = prev }
-}
-
-// resolveExternalMapping replaces an `!external`-tagged mapping node
-// with the loaded + path-rewritten yaml content from the fetched repo.
-//
-// `currentDir` and `rootDir` are inherited from expandNode's resolve
-// frame. `visited` tracks (repo, sha, path) tuples for cycle detection
-// across nested externals.
-func resolveExternalMapping(n *yaml.Node, currentDir, rootDir string, visited map[string]bool, depth int) error {
-	if depth > maxExternalDepth {
-		return fmt.Errorf("!external: max depth %d exceeded (possible cycle)", maxExternalDepth)
-	}
-	if rootDir == "" {
-		return fmt.Errorf("!external at line %d requires a dir-based org template (no rootDir in inline-template mode)", n.Line)
-	}
-
-	var ref ExternalRef
-	if err := n.Decode(&ref); err != nil {
-		return fmt.Errorf("!external at line %d: decode: %w", n.Line, err)
-	}
-	if ref.Repo == "" || ref.Ref == "" || ref.Path == "" {
-		return fmt.Errorf("!external at line %d: repo, ref, path are all required (got %+v)", n.Line, ref)
-	}
-	if !safeRefPattern.MatchString(ref.Ref) {
-		return fmt.Errorf("!external at line %d: ref %q contains disallowed characters", n.Line, ref.Ref)
-	}
-	// Defense-in-depth: even though git itself rejects refs containing
-	// `..`, the regex above currently allows them. Reject explicitly.
-	if strings.Contains(ref.Ref, "..") {
-		return fmt.Errorf("!external at line %d: ref %q must not contain '..'", n.Line, ref.Ref)
-	}
-	if strings.Contains(ref.Path, "..") || strings.HasPrefix(ref.Path, "/") {
-		return fmt.Errorf("!external at line %d: path %q must be relative-and-down-only", n.Line, ref.Path)
-	}
-
-	host := ref.URL
-	if host == "" {
-		host = os.Getenv("MOLECULE_EXTERNAL_GITEA_URL")
-	}
-	if host == "" {
-		host = "git.moleculesai.app"
-	}
-	host = strings.TrimPrefix(strings.TrimPrefix(host, "https://"), "http://")
-	host = strings.TrimSuffix(host, "/")
-
-	if !allowlistedHostPath(host, ref.Repo) {
-		return fmt.Errorf("!external at line %d: %s/%s not in MOLECULE_EXTERNAL_REPO_ALLOWLIST", n.Line, host, ref.Repo)
-	}
-
-	ctx, cancel := context.WithTimeout(context.Background(), gitFetchTimeout)
-	defer cancel()
-
-	cacheDir, sha, err := defaultExternalFetcher.Fetch(ctx, rootDir, host, ref.Repo, ref.Ref)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: fetch %s/%s@%s: %w", n.Line, host, ref.Repo, ref.Ref, err)
-	}
-
-	// Cycle key: (repo, sha, path) — same external content reachable
-	// via two paths is fine, but a self-referential cycle isn't.
-	cycleKey := fmt.Sprintf("%s/%s@%s/%s", host, ref.Repo, sha, ref.Path)
-	if visited[cycleKey] {
-		return fmt.Errorf("!external cycle detected at %q (line %d)", cycleKey, n.Line)
-	}
-
-	// Validate path resolves inside the cache dir (anti-traversal).
-	yamlPathAbs, err := resolveInsideRoot(cacheDir, ref.Path)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: path %q: %w", n.Line, ref.Path, err)
-	}
-	if _, err := os.Stat(yamlPathAbs); err != nil {
-		return fmt.Errorf("!external at line %d: %s/%s@%s does not contain %q: %w", n.Line, host, ref.Repo, sha, ref.Path, err)
-	}
-
-	data, err := os.ReadFile(yamlPathAbs)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: read %q: %w", n.Line, yamlPathAbs, err)
-	}
-
-	var sub yaml.Node
-	if err := yaml.Unmarshal(data, &sub); err != nil {
-		return fmt.Errorf("!external at line %d: parse %q: %w", n.Line, yamlPathAbs, err)
-	}
-	root := &sub
-	if root.Kind == yaml.DocumentNode && len(root.Content) == 1 {
-		root = root.Content[0]
-	}
-
-	// Recurse FIRST: load all nested !include / !external content into
-	// the tree. Then rewrite ALL files_dir scalars in the fully-resolved
-	// tree (top + nested) with the cache prefix in one pass. Doing
-	// rewrite-before-recurse would leave nested-loaded files_dir paths
-	// unprefixed.
-	visited[cycleKey] = true
-	defer delete(visited, cycleKey)
-
-	subDir := filepath.Dir(yamlPathAbs)
-	if err := expandNode(root, subDir, rootDir, visited, depth+1); err != nil {
-		return err
-	}
-
-	// Path rewrite: prefix every files_dir scalar in the fully-resolved
-	// content with the cache-relative-from-rootDir prefix. After this
-	// pass, fetched workspaces look like ordinary in-tree workspaces.
-	cachePrefix, err := filepath.Rel(rootDir, cacheDir)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: cannot compute cache prefix: %w", n.Line, err)
-	}
-	rewriteFilesDir(root, cachePrefix)
-
-	// Replace the !external mapping with the resolved content in-place.
-	*n = *root
-	if n.Tag == "!external" {
-		n.Tag = ""
-	}
-	return nil
-}
-
-// rewriteFilesDir walks the yaml node tree and prepends cachePrefix to
-// every files_dir scalar value. Idempotent: if a files_dir value already
-// starts with the prefix, no-op.
-//
-// !include paths are intentionally NOT rewritten. They resolve relative
-// to their containing file's directory (subDir in expandNode), and after
-// fetch that directory IS inside the cache, so relative !include paths
-// Just Work without any rewrite. Rewriting them would double-prefix on
-// recursive resolution.
-//
-// files_dir DOES need rewriting because it's consumed at workspace-
-// provisioning time relative to orgBaseDir (the parent template's root),
-// not relative to the workspace.yaml's containing dir.
-func rewriteFilesDir(n *yaml.Node, cachePrefix string) {
-	if n == nil {
-		return
-	}
-	if n.Kind == yaml.MappingNode {
-		for i := 0; i+1 < len(n.Content); i += 2 {
-			key, value := n.Content[i], n.Content[i+1]
-			if key.Kind == yaml.ScalarNode && key.Value == "files_dir" && value.Kind == yaml.ScalarNode {
-				if !strings.HasPrefix(value.Value, cachePrefix+string(filepath.Separator)) && value.Value != cachePrefix {
-					value.Value = filepath.Join(cachePrefix, value.Value)
-				}
-			}
-		}
-	}
-	for _, child := range n.Content {
-		rewriteFilesDir(child, cachePrefix)
-	}
-}
-
-// safeRepoCacheDir converts a repo path like "molecule-ai/foo" into a
-// filesystem-safe segment "molecule-ai__foo". Avoids nesting cache dirs
-// (which would complicate cleanup).
-func safeRepoCacheDir(host, repoPath string) string {
-	hp := host + "/" + repoPath
-	hp = strings.ReplaceAll(hp, "/", "__")
-	hp = strings.ReplaceAll(hp, ":", "_")
-	return hp
-}
-
-// gitFetcher is the production externalFetcher: shells out to `git` to
-// clone the repo at ref into the cache dir. Cache key includes the
-// resolved SHA, so different SHAs of the same ref get different cache
-// dirs (no overwrite).
-//
-// Token handling — important for security. The auth token never enters
-// the clone URL (and therefore never lands in the cloned repo's
-// .git/config) and never appears in returned errors. We use git's
-// `http.extraHeader` config option (passed via `-c`), which sends an
-// Authorization header per-request without persisting it. The token is
-// briefly visible in the `git` process's argv (so other local users
-// with the same uid could see it via `ps`), which is the same exposure
-// it has via the env var that supplied it.
-//
-// Cache validity uses a `.complete` marker written after a successful
-// clone+rename. Cache-hit checks for the marker, not just the dir
-// existence — a partially-written cache (clone failed mid-way, or a
-// concurrent caller wrote a half-baked cache dir) is treated as cache
-// miss and re-fetched cleanly.
-type gitFetcher struct{}
-
-// cacheCompleteMarker is the filename written after a successful clone.
-// Cache-hit requires this marker; without it, the cache dir is treated
-// as partially-written and re-fetched.
-const cacheCompleteMarker = ".complete"
-
-// Fetch resolves ref → SHA via `git ls-remote`, then `git clone --depth=1`
-// if the cache dir is missing or incomplete. Auth via MOLECULE_GITEA_TOKEN
-// injected via http.extraHeader (never via URL).
-func (g *gitFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
-	cacheRoot := filepath.Join(rootDir, externalCacheDirName, safeRepoCacheDir(host, repoPath))
-	if err := os.MkdirAll(cacheRoot, 0o755); err != nil {
-		return "", "", fmt.Errorf("mkdir cache root: %w", err)
-	}
-
-	cloneURL := buildExternalCloneURL(host, repoPath)
-	gitArgs := func(extra ...string) []string {
-		args := authConfigArgs()
-		return append(args, extra...)
-	}
-
-	// 1. Resolve ref → SHA (so cache dir is content-addressable).
-	sha, err := g.resolveRefToSHA(ctx, cloneURL, ref, gitArgs)
-	if err != nil {
-		return "", "", fmt.Errorf("ls-remote: %s", redactToken(err.Error()))
-	}
-
-	cacheDir := filepath.Join(cacheRoot, sha)
-	// Cache-hit requires the .complete marker AND the .git dir.
-	// Without the marker, cache is partially-written → treat as miss.
-	if isCacheComplete(cacheDir) {
-		return cacheDir, sha, nil
-	}
-
-	// Cache miss or partially-written — clean any stale cacheDir before
-	// cloning (a previous broken attempt would otherwise block rename).
-	os.RemoveAll(cacheDir)
-
-	// 2. Clone into a sibling tmp dir; atomic rename on success.
-	tmpDir, err := os.MkdirTemp(cacheRoot, sha+".tmp.")
-	if err != nil {
-		return "", "", fmt.Errorf("mkdir tmp: %w", err)
-	}
-	// MkdirTemp creates the dir; git clone refuses to clone into a
-	// non-empty dir. Remove + recreate empty.
-	os.RemoveAll(tmpDir)
-	cloneAndConfig := append(gitArgs("clone", "--quiet", "--depth=1", "-b", ref, cloneURL, tmpDir))
-	cmd := exec.CommandContext(ctx, "git", cloneAndConfig...)
-	cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
-	if out, err := cmd.CombinedOutput(); err != nil {
-		os.RemoveAll(tmpDir)
-		return "", "", fmt.Errorf("git clone: %w: %s", err, redactToken(strings.TrimSpace(string(out))))
-	}
-
-	// Write the .complete marker BEFORE the rename. If rename succeeds,
-	// the marker is in place. If rename loses the race (concurrent
-	// fetcher won), our tmp gets cleaned up and we trust the winner.
-	if err := os.WriteFile(filepath.Join(tmpDir, cacheCompleteMarker), []byte(time.Now().UTC().Format(time.RFC3339)), 0o644); err != nil {
-		os.RemoveAll(tmpDir)
-		return "", "", fmt.Errorf("write complete marker: %w", err)
-	}
-
-	if err := os.Rename(tmpDir, cacheDir); err != nil {
-		// Race: another import beat us. Validate THEIR cache, accept it.
-		os.RemoveAll(tmpDir)
-		if isCacheComplete(cacheDir) {
-			return cacheDir, sha, nil
-		}
-		return "", "", fmt.Errorf("rename clone to cache (and winner's cache is incomplete): %w", err)
-	}
-	return cacheDir, sha, nil
-}
-
-// isCacheComplete reports whether cacheDir contains both the cloned
-// repo (.git) and the .complete marker. Treats partial state as miss.
-func isCacheComplete(cacheDir string) bool {
-	if _, err := os.Stat(filepath.Join(cacheDir, ".git")); err != nil {
-		return false
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir, cacheCompleteMarker)); err != nil {
-		return false
-	}
-	return true
-}
-
-func (g *gitFetcher) resolveRefToSHA(ctx context.Context, cloneURL, ref string, gitArgs func(...string) []string) (string, error) {
-	args := gitArgs("ls-remote", cloneURL, ref)
-	cmd := exec.CommandContext(ctx, "git", args...)
-	cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
-	out, err := cmd.Output()
-	if err != nil {
-		return "", err
-	}
-	line := strings.TrimSpace(string(out))
-	if line == "" {
-		return "", fmt.Errorf("ref %q not found", ref)
-	}
-	// First whitespace-separated field is the SHA.
-	for i, ch := range line {
-		if ch == ' ' || ch == '\t' {
-			return line[:i], nil
-		}
-	}
-	return line, nil
-}
-
-// buildExternalCloneURL constructs the clone URL WITHOUT auth in userinfo.
-// Auth is layered on via authConfigArgs's http.extraHeader.
-func buildExternalCloneURL(host, repoPath string) string {
-	u := url.URL{Scheme: "https", Host: host, Path: "/" + repoPath + ".git"}
-	return u.String()
-}
-
-// authConfigArgs returns the `-c http.extraHeader=Authorization: token X`
-// args to pass to git, OR an empty slice if no token is set. The token
-// goes into the request headers (not the URL or .git/config), so it
-// doesn't persist on disk and doesn't appear in clone error output.
-func authConfigArgs() []string {
-	token := os.Getenv("MOLECULE_GITEA_TOKEN")
-	if token == "" {
-		return nil
-	}
-	return []string{"-c", "http.extraHeader=Authorization: token " + token}
-}
-
-// redactToken scrubs the auth token from a string before it's logged
-// or returned in an error. Belt-and-braces: with the http.extraHeader
-// approach the token shouldn't appear in git's output, but if some
-// future git version or libcurl debug mode emits it, this catches it.
-func redactToken(s string) string {
-	token := os.Getenv("MOLECULE_GITEA_TOKEN")
-	if token == "" || len(token) < 8 {
-		return s
-	}
-	return strings.ReplaceAll(s, token, "<redacted-token>")
-}
-
--- a/workspace-server/internal/handlers/org_external_integration_test.go
+++ b/workspace-server/internal/handlers/org_external_integration_test.go
@ -1,379 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"runtime"
-	"strings"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-// PR-B integration test: exercises the REAL gitFetcher (no fakeFetcher
-// injection) against a local bare-git repo. Uses git's `insteadOf`
-// config to rewrite the configured Gitea URL to the local bare path
-// at clone time, so the fetcher's URL-building, ls-remote, clone,
-// atomic-rename, and cache-hit paths all run against real git
-// without requiring network or modifying production code.
-//
-// Internal#77 task #233 (PR-B from the design's phasing).
-
-// TestGitFetcher_RealClone_LocalRedirect proves the production
-// gitFetcher round-trips correctly against a real git repository.
-// Steps:
-//   1. Set up a local bare-git repo with workspace content.
-//   2. Configure git's `insteadOf` to rewrite the gitea URL → local path
-//      via GIT_CONFIG_COUNT/KEY/VALUE env vars (process-scoped).
-//   3. Run resolveYAMLIncludes with !external pointing at the gitea URL.
-//   4. Assert: cache dir populated; content materialized; path rewrite
-//      applied; second invocation hits cache (no second clone).
-func TestGitFetcher_RealClone_LocalRedirect(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-
-	if runtime.GOOS == "windows" {
-		t.Skip("path-based git URLs behave differently on Windows; skipping")
-	}
-
-	// Step 1: create a local bare-git repo at <fixtures>/test-dev-dept.git
-	// with workspace content. Use a working clone to add content, then
-	// push to the bare.
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "test-dev-dept.git")
-	workPath := filepath.Join(fixtures, "work")
-
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "test@example.com")
-	mustGit(t, workPath, "config", "user.name", "Integration Test")
-
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/workspace.yaml"), `name: Dev Lead
-files_dir: dev-lead
-children:
-  - !include ./core-be/workspace.yaml
-`)
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/system-prompt.md"), "Dev Lead persona body.\n")
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/core-be/workspace.yaml"), `name: Core BE
-files_dir: dev-lead/core-be
-`)
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/core-be/system-prompt.md"), "Core BE persona body.\n")
-
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed dev tree")
-	mustGit(t, workPath, "push", "origin", "main")
-
-	// Step 2: configure git's insteadOf rewrite. The fetcher will try
-	// to clone https://git.moleculesai.app/molecule-ai/test-dev-dept.git;
-	// git rewrites to file://<barePath>.
-	//
-	// GIT_CONFIG_COUNT/KEY/VALUE injects config without touching
-	// ~/.gitconfig — process-scoped, no test pollution.
-	geesUrl := "https://git.moleculesai.app/molecule-ai/test-dev-dept.git"
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", geesUrl)
-
-	// Step 3: run resolveYAMLIncludes with !external pointing at the
-	// gitea URL. Allowlist is the default (molecule-ai/* on Gitea host).
-	rootDir := t.TempDir()
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/test-dev-dept
-    ref: main
-    path: dev-lead/workspace.yaml
-`)
-
-	out, err := resolveYAMLIncludes(src, rootDir)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 {
-		t.Fatalf("workspaces: %+v", tmpl.Workspaces)
-	}
-	dev := tmpl.Workspaces[0]
-	if dev.Name != "Dev Lead" {
-		t.Errorf("dev.Name = %q; want Dev Lead", dev.Name)
-	}
-	if !strings.Contains(dev.FilesDir, ".external-cache") {
-		t.Errorf("dev.FilesDir = %q; want cache prefix", dev.FilesDir)
-	}
-	if !strings.HasSuffix(dev.FilesDir, "dev-lead") {
-		t.Errorf("dev.FilesDir = %q; want suffix dev-lead", dev.FilesDir)
-	}
-	if len(dev.Children) != 1 {
-		t.Fatalf("expected nested core-be child; got %+v", dev.Children)
-	}
-	core := dev.Children[0]
-	if core.Name != "Core BE" {
-		t.Errorf("core.Name = %q; want Core BE", core.Name)
-	}
-	if !strings.HasSuffix(core.FilesDir, filepath.Join("dev-lead", "core-be")) {
-		t.Errorf("core.FilesDir = %q; want suffix dev-lead/core-be", core.FilesDir)
-	}
-
-	// Step 4: verify the cache dir actually exists and contains the
-	// materialized files (CopyTemplateToContainer would tar these).
-	cacheRoot := filepath.Join(rootDir, ".external-cache")
-	entries, err := os.ReadDir(cacheRoot)
-	if err != nil {
-		t.Fatalf("read cache root: %v", err)
-	}
-	if len(entries) != 1 {
-		t.Fatalf("expected 1 cached repo, got %d: %v", len(entries), entries)
-	}
-	repoDir := filepath.Join(cacheRoot, entries[0].Name())
-	shaDirs, _ := os.ReadDir(repoDir)
-	if len(shaDirs) != 1 {
-		t.Fatalf("expected 1 SHA cache dir, got %d", len(shaDirs))
-	}
-	cacheDir := filepath.Join(repoDir, shaDirs[0].Name())
-	if _, err := os.Stat(filepath.Join(cacheDir, "dev-lead/system-prompt.md")); err != nil {
-		t.Errorf("expected dev-lead/system-prompt.md in cache: %v", err)
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir, "dev-lead/core-be/system-prompt.md")); err != nil {
-		t.Errorf("expected dev-lead/core-be/system-prompt.md in cache: %v", err)
-	}
-
-	// Step 5: re-run; verify cache hit (no second clone). Set a
-	// "marker" file in the cache that a second clone would clobber.
-	marker := filepath.Join(cacheDir, ".cache-hit-marker")
-	if err := os.WriteFile(marker, []byte("hit"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	out2, err := resolveYAMLIncludes(src, rootDir)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes second call: %v", err)
-	}
-	if string(out) != string(out2) {
-		t.Errorf("cached output differs from initial — non-deterministic resolve")
-	}
-	if _, err := os.Stat(marker); err != nil {
-		t.Errorf("cache hit not honored — marker file disappeared: %v", err)
-	}
-}
-
-// TestGitFetcher_RealClone_BadRefFails: pointing at a ref that doesn't
-// exist in the bare-repo surfaces git's error cleanly.
-func TestGitFetcher_RealClone_BadRefFails(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping on windows")
-	}
-
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "empty-repo.git")
-	workPath := filepath.Join(fixtures, "work")
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "test@example.com")
-	mustGit(t, workPath, "config", "user.name", "Test")
-	mustWriteFile(t, filepath.Join(workPath, "README.md"), "x")
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed")
-	mustGit(t, workPath, "push", "origin", "main")
-
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", "https://git.moleculesai.app/molecule-ai/empty-repo.git")
-
-	rootDir := t.TempDir()
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/empty-repo
-    ref: nonexistent-branch
-    path: anything.yaml
-`)
-	_, err := resolveYAMLIncludes(src, rootDir)
-	if err == nil {
-		t.Fatalf("expected error for nonexistent ref; got nil")
-	}
-	if !strings.Contains(err.Error(), "ref") && !strings.Contains(err.Error(), "ls-remote") && !strings.Contains(err.Error(), "not found") {
-		t.Errorf("error doesn't mention ref/ls-remote: %v", err)
-	}
-}
-
-// ---------- helpers ----------
-
-func mustGit(t *testing.T, cwd string, args ...string) {
-	t.Helper()
-	cmd := exec.Command("git", args...)
-	if cwd != "" {
-		cmd.Dir = cwd
-	}
-	// Ensure user.email/name are set globally for non-cwd commands too.
-	cmd.Env = append(os.Environ(),
-		"GIT_AUTHOR_EMAIL=test@example.com",
-		"GIT_AUTHOR_NAME=Integration Test",
-		"GIT_COMMITTER_EMAIL=test@example.com",
-		"GIT_COMMITTER_NAME=Integration Test",
-	)
-	if out, err := cmd.CombinedOutput(); err != nil {
-		t.Fatalf("git %s: %v\n%s", strings.Join(args, " "), err, string(out))
-	}
-}
-
-func mustWriteFile(t *testing.T, path, content string) {
-	t.Helper()
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
-		t.Fatal(err)
-	}
-}
-
-// Verify gitFetcher.Fetch direct invocation (no resolver wrapping) for
-// the cache-hit path, exercising the bare API against a local bare-repo.
-func TestGitFetcher_DirectFetch_CacheHit(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping on windows")
-	}
-
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "direct.git")
-	workPath := filepath.Join(fixtures, "w")
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "t@e")
-	mustGit(t, workPath, "config", "user.name", "T")
-	mustWriteFile(t, filepath.Join(workPath, "marker.txt"), "hello")
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed")
-	mustGit(t, workPath, "push", "origin", "main")
-
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", "https://git.moleculesai.app/molecule-ai/direct.git")
-
-	rootDir := t.TempDir()
-	g := &gitFetcher{}
-	ctx := context.Background()
-
-	cacheDir1, sha1, err := g.Fetch(ctx, rootDir, "git.moleculesai.app", "molecule-ai/direct", "main")
-	if err != nil {
-		t.Fatalf("first Fetch: %v", err)
-	}
-	if sha1 == "" || len(sha1) < 7 {
-		t.Errorf("expected SHA-like string, got %q", sha1)
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir1, "marker.txt")); err != nil {
-		t.Errorf("first fetch missing marker.txt: %v", err)
-	}
-
-	// Second call: cache hit, returns same dir + sha, no re-clone.
-	stamp := filepath.Join(cacheDir1, ".not-clobbered-by-second-fetch")
-	if err := os.WriteFile(stamp, []byte("x"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	cacheDir2, sha2, err := g.Fetch(ctx, rootDir, "git.moleculesai.app", "molecule-ai/direct", "main")
-	if err != nil {
-		t.Fatalf("second Fetch: %v", err)
-	}
-	if cacheDir2 != cacheDir1 || sha2 != sha1 {
-		t.Errorf("cache miss on second call: %q/%q vs %q/%q", cacheDir1, sha1, cacheDir2, sha2)
-	}
-	if _, err := os.Stat(stamp); err != nil {
-		t.Errorf("cache hit not honored — stamp file disappeared: %v", err)
-	}
-}
-
-// TestGitFetcher_RejectsRefWithDoubleDot: defense-in-depth on ref input.
-// safeRefPattern allows '.' as a regex character, so ".." would match
-// without an explicit deny. Verify it's rejected even though git itself
-// would also reject the resulting clone.
-func TestGitFetcher_RejectsRefWithDoubleDot(t *testing.T) {
-	rootDir := t.TempDir()
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/x
-    ref: foo..bar
-    path: x.yaml
-`)
-	_, err := resolveYAMLIncludes(src, rootDir)
-	if err == nil {
-		t.Fatalf("expected '..' rejection")
-	}
-	if !strings.Contains(err.Error(), "..") {
-		t.Errorf("expected '..' in error; got %v", err)
-	}
-}
-
-// TestGitFetcher_CacheValidatedByCompleteMarker: a partially-written
-// cache (the .git dir exists but no .complete marker) is treated as
-// cache-miss and re-fetched. Catches the broken-cache-permanence bug.
-func TestGitFetcher_CacheValidatedByCompleteMarker(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git not found: %v", err)
-	}
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping on windows")
-	}
-
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "test.git")
-	workPath := filepath.Join(fixtures, "w")
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "t@e")
-	mustGit(t, workPath, "config", "user.name", "T")
-	mustWriteFile(t, filepath.Join(workPath, "good.txt"), "from-network")
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed")
-	mustGit(t, workPath, "push", "origin", "main")
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", "https://git.moleculesai.app/molecule-ai/marker-test.git")
-
-	rootDir := t.TempDir()
-	g := &gitFetcher{}
-
-	// First fetch — populates the cache (creates .complete marker).
-	cacheDir1, _, err := g.Fetch(context.Background(), rootDir, "git.moleculesai.app", "molecule-ai/marker-test", "main")
-	if err != nil {
-		t.Fatalf("first Fetch: %v", err)
-	}
-	marker := filepath.Join(cacheDir1, cacheCompleteMarker)
-	if _, err := os.Stat(marker); err != nil {
-		t.Fatalf("first fetch should have written .complete marker: %v", err)
-	}
-
-	// Now simulate a partial cache: delete the marker but leave .git
-	// in place. The next Fetch should treat this as cache-miss and
-	// re-fetch (NOT silently use the partial cache).
-	if err := os.Remove(marker); err != nil {
-		t.Fatal(err)
-	}
-	// Drop a sentinel file the second fetch will clobber if it re-fetches.
-	sentinel := filepath.Join(cacheDir1, "_should_be_clobbered")
-	if err := os.WriteFile(sentinel, []byte("partial"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	cacheDir2, _, err := g.Fetch(context.Background(), rootDir, "git.moleculesai.app", "molecule-ai/marker-test", "main")
-	if err != nil {
-		t.Fatalf("second Fetch: %v", err)
-	}
-	if cacheDir1 != cacheDir2 {
-		t.Errorf("cache dirs differ across fetches: %q vs %q", cacheDir1, cacheDir2)
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir2, cacheCompleteMarker)); err != nil {
-		t.Errorf("re-fetch should have re-written .complete marker: %v", err)
-	}
-	if _, err := os.Stat(sentinel); err == nil {
-		t.Errorf("sentinel still present — re-fetch did NOT clobber partial cache")
-	}
-}
--- a/workspace-server/internal/handlers/org_external_test.go
+++ b/workspace-server/internal/handlers/org_external_test.go
@ -1,331 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-// fakeFetcher pre-stages a "fetched" repo at a fixed path inside the
-// rootDir's .external-cache, bypassing the real git clone. Tests
-// inject this via SetExternalFetcherForTest to exercise the resolver
-// + path-rewrite logic without network.
-type fakeFetcher struct {
-	// content maps "<host>/<repo>@<ref>" → a function that materializes
-	// repo content under cacheDir. Returns the fake SHA to use.
-	content map[string]func(cacheDir string) (sha string, err error)
-}
-
-func (f *fakeFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
-	key := host + "/" + repoPath + "@" + ref
-	stage, ok := f.content[key]
-	if !ok {
-		return "", "", &fakeNotFoundError{key: key}
-	}
-	// Use a stable SHA for the test so cache dir is deterministic.
-	cacheDir := filepath.Join(rootDir, ".external-cache", safeRepoCacheDir(host, repoPath), "deadbeef")
-	if err := os.MkdirAll(cacheDir, 0o755); err != nil {
-		return "", "", err
-	}
-	sha, err := stage(cacheDir)
-	if err != nil {
-		return "", "", err
-	}
-	return cacheDir, sha, nil
-}
-
-type fakeNotFoundError struct{ key string }
-
-func (e *fakeNotFoundError) Error() string {
-	return "fake fetcher: no content registered for " + e.key
-}
-
-// stageFiles writes a map of relative-path → content into cacheDir,
-// returning a fake SHA. Helper for fakeFetcher closures.
-func stageFiles(cacheDir string, files map[string]string) error {
-	if err := os.MkdirAll(filepath.Join(cacheDir, ".git"), 0o755); err != nil {
-		return err
-	}
-	for path, content := range files {
-		full := filepath.Join(cacheDir, path)
-		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
-			return err
-		}
-		if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// TestResolveExternalMapping_HappyPath: a parent template with an
-// !external entry resolves cleanly into the fetched workspace + path-
-// rewrites files_dir + relative !include refs into the cache prefix.
-func TestResolveExternalMapping_HappyPath(t *testing.T) {
-	tmp := t.TempDir()
-
-	// Stub fetcher: "fetched" content has a workspace.yaml that uses
-	// files_dir + nested !include relative to the fetched repo's root.
-	fake := &fakeFetcher{
-		content: map[string]func(string) (string, error){
-			"git.moleculesai.app/molecule-ai/molecule-dev-department@main": func(cacheDir string) (string, error) {
-				return "deadbeef", stageFiles(cacheDir, map[string]string{
-					"dev-lead/workspace.yaml": `name: Dev Lead
-files_dir: dev-lead
-children:
-  - !include ./core-lead/workspace.yaml
-`,
-					"dev-lead/core-lead/workspace.yaml": `name: Core Platform Lead
-files_dir: dev-lead/core-lead
-`,
-				})
-			},
-		},
-	}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	src := []byte(`name: Parent
-workspaces:
-  - !external
-    repo: molecule-ai/molecule-dev-department
-    ref: main
-    path: dev-lead/workspace.yaml
-`)
-
-	out, err := resolveYAMLIncludes(src, tmp)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 {
-		t.Fatalf("workspaces: %+v", tmpl.Workspaces)
-	}
-	dev := tmpl.Workspaces[0]
-	if dev.Name != "Dev Lead" {
-		t.Errorf("dev.Name = %q; want Dev Lead", dev.Name)
-	}
-	// files_dir should be cache-prefixed.
-	wantPrefix := filepath.Join(".external-cache", "git.moleculesai.app__molecule-ai__molecule-dev-department", "deadbeef")
-	if !strings.HasPrefix(dev.FilesDir, wantPrefix) {
-		t.Errorf("dev.FilesDir = %q; want prefix %q", dev.FilesDir, wantPrefix)
-	}
-	if !strings.HasSuffix(dev.FilesDir, "dev-lead") {
-		t.Errorf("dev.FilesDir = %q; want suffix dev-lead", dev.FilesDir)
-	}
-	// Nested child: files_dir cache-prefixed, name Core Platform Lead.
-	if len(dev.Children) != 1 {
-		t.Fatalf("dev.Children: %+v", dev.Children)
-	}
-	core := dev.Children[0]
-	if core.Name != "Core Platform Lead" {
-		t.Errorf("core.Name = %q; want Core Platform Lead", core.Name)
-	}
-	if !strings.HasPrefix(core.FilesDir, wantPrefix) {
-		t.Errorf("core.FilesDir = %q; want prefix %q", core.FilesDir, wantPrefix)
-	}
-	if !strings.HasSuffix(core.FilesDir, filepath.Join("dev-lead", "core-lead")) {
-		t.Errorf("core.FilesDir = %q; want suffix dev-lead/core-lead", core.FilesDir)
-	}
-}
-
-// TestResolveExternalMapping_AllowlistRejection: hostile yaml pointing
-// at a non-allowlisted repo gets rejected.
-func TestResolveExternalMapping_AllowlistRejection(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	// Default allowlist is git.moleculesai.app/molecule-ai/*.
-	// github.com/foo/bar is NOT in it.
-	src := []byte(`workspaces:
-  - !external
-    repo: foo/bar
-    ref: main
-    path: x.yaml
-    url: github.com
-`)
-	_, err := resolveYAMLIncludes(src, tmp)
-	if err == nil {
-		t.Fatalf("expected allowlist rejection, got nil")
-	}
-	if !strings.Contains(err.Error(), "MOLECULE_EXTERNAL_REPO_ALLOWLIST") {
-		t.Errorf("expected allowlist error; got %v", err)
-	}
-}
-
-// TestResolveExternalMapping_PathTraversalRejection: hostile yaml
-// with `path: ../../etc/passwd` gets rejected before fetch.
-func TestResolveExternalMapping_PathTraversalRejection(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/dev-department
-    ref: main
-    path: ../../etc/passwd
-`)
-	_, err := resolveYAMLIncludes(src, tmp)
-	if err == nil {
-		t.Fatalf("expected path traversal rejection, got nil")
-	}
-	if !strings.Contains(err.Error(), "relative-and-down-only") {
-		t.Errorf("expected path traversal error; got %v", err)
-	}
-}
-
-// TestResolveExternalMapping_BadRefRejection: non-allowlisted ref chars.
-func TestResolveExternalMapping_BadRefRejection(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/dev-department
-    ref: "main; rm -rf /"
-    path: foo.yaml
-`)
-	_, err := resolveYAMLIncludes(src, tmp)
-	if err == nil || !strings.Contains(err.Error(), "disallowed characters") {
-		t.Errorf("expected ref-validation error; got %v", err)
-	}
-}
-
-// TestResolveExternalMapping_MissingRequiredFields: repo / ref / path
-// are all required.
-func TestResolveExternalMapping_MissingRequiredFields(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	cases := []string{
-		// missing repo
-		`workspaces:
-  - !external
-    ref: main
-    path: x.yaml
-`,
-		// missing ref
-		`workspaces:
-  - !external
-    repo: molecule-ai/x
-    path: x.yaml
-`,
-		// missing path
-		`workspaces:
-  - !external
-    repo: molecule-ai/x
-    ref: main
-`,
-	}
-	for i, src := range cases {
-		_, err := resolveYAMLIncludes([]byte(src), tmp)
-		if err == nil {
-			t.Errorf("case %d: expected required-field error, got nil", i)
-		} else if !strings.Contains(err.Error(), "required") {
-			t.Errorf("case %d: want 'required' in error; got %v", i, err)
-		}
-	}
-}
-
-// TestRewriteFilesDir: verify the path-rewrite walker
-// prefixes files_dir scalars. !include scalars are NOT rewritten —
-// they resolve relative to their containing file's dir, which post-
-// fetch is naturally inside the cache.
-func TestRewriteFilesDir(t *testing.T) {
-	src := `name: Foo
-files_dir: dev-lead
-children:
-  - !include ./bar/workspace.yaml
-  - !include other-team.yaml
-inner:
-  files_dir: dev-lead/sub
-`
-	var n yaml.Node
-	if err := yaml.Unmarshal([]byte(src), &n); err != nil {
-		t.Fatal(err)
-	}
-	rewriteFilesDir(&n, ".external-cache/foo/bar")
-
-	out, err := yaml.Marshal(&n)
-	if err != nil {
-		t.Fatal(err)
-	}
-	got := string(out)
-	for _, want := range []string{
-		"files_dir: .external-cache/foo/bar/dev-lead",
-		"files_dir: .external-cache/foo/bar/dev-lead/sub",
-		// !include preserved as-is; resolves naturally via subDir.
-		"!include ./bar/workspace.yaml",
-		"!include other-team.yaml",
-	} {
-		if !strings.Contains(got, want) {
-			t.Errorf("missing %q in:\n%s", want, got)
-		}
-	}
-}
-
-// TestRewriteFilesDir_Idempotent: re-running the rewriter
-// on already-prefixed files_dir doesn't double-prefix.
-func TestRewriteFilesDir_Idempotent(t *testing.T) {
-	src := `files_dir: .external-cache/foo/bar/dev-lead
-inner:
-  files_dir: .external-cache/foo/bar/dev-lead/sub
-`
-	var n yaml.Node
-	if err := yaml.Unmarshal([]byte(src), &n); err != nil {
-		t.Fatal(err)
-	}
-	rewriteFilesDir(&n, ".external-cache/foo/bar")
-
-	out, _ := yaml.Marshal(&n)
-	got := string(out)
-	if strings.Contains(got, ".external-cache/foo/bar/.external-cache") {
-		t.Errorf("double-prefix detected:\n%s", got)
-	}
-	// Should still be valid (single-prefixed) afterwards.
-	for _, want := range []string{
-		"files_dir: .external-cache/foo/bar/dev-lead",
-		"files_dir: .external-cache/foo/bar/dev-lead/sub",
-	} {
-		if !strings.Contains(got, want) {
-			t.Errorf("expected unchanged %q in:\n%s", want, got)
-		}
-	}
-}
-
-// TestAllowlistedHostPath: env-var override + glob matching.
-func TestAllowlistedHostPath(t *testing.T) {
-	t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "")
-	if !allowlistedHostPath("git.moleculesai.app", "molecule-ai/foo") {
-		t.Error("default allowlist should accept molecule-ai/*")
-	}
-	if allowlistedHostPath("github.com", "molecule-ai/foo") {
-		t.Error("default allowlist should reject github.com")
-	}
-	t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "github.com/me/*,git.moleculesai.app/*")
-	if !allowlistedHostPath("github.com", "me/x") {
-		t.Error("override should accept github.com/me/*")
-	}
-	if !allowlistedHostPath("git.moleculesai.app", "any/repo") {
-		t.Error("override should accept git.moleculesai.app/*")
-	}
-	if allowlistedHostPath("github.com", "evil/x") {
-		t.Error("override should reject github.com/evil/*")
-	}
-}
--- a/workspace-server/internal/handlers/org_helpers.go
+++ b/workspace-server/internal/handlers/org_helpers.go
@ -6,7 +6,6 @@ package handlers

 import (
 	"fmt"
-	"log"
 	"os"
 	"path/filepath"
 	"regexp"
@ -103,56 +102,6 @@ func loadWorkspaceEnv(orgBaseDir, filesDir string) map[string]string {
 	return envVars
 }

-// loadPersonaEnvFile merges per-role persona credentials into out. The file
-// lives at $MOLECULE_PERSONA_ROOT/<role>/env (default
-// /etc/molecule-bootstrap/personas) and is populated by the operator-host
-// bootstrap kit — one persona per dev-tree role, each carrying the role's
-// Gitea identity (GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES,
-// GITEA_USER_EMAIL, GITEA_SSH_KEY_PATH).
-//
-// Lower precedence than the org and workspace .env files: callers should
-// invoke this BEFORE parseEnvFile on those, so a workspace .env can
-// override a persona-default value when needed.
-//
-// Silent no-op when role is empty, when the role name fails the safe-segment
-// check, or when the env file does not exist (workspaces without a role —
-// or running on hosts that don't ship the bootstrap dir — keep their old
-// behavior).
-func loadPersonaEnvFile(role string, out map[string]string) {
-	if !isSafeRoleName(role) {
-		if role != "" {
-			log.Printf("Org import: refusing persona env load for unsafe role name %q", role)
-		}
-		return
-	}
-	root := os.Getenv("MOLECULE_PERSONA_ROOT")
-	if root == "" {
-		root = "/etc/molecule-bootstrap/personas"
-	}
-	parseEnvFile(filepath.Join(root, role, "env"), out)
-}
-
-// isSafeRoleName accepts a single path segment of [A-Za-z0-9_-]+. Rejects
-// empty, ".", "..", and anything containing a path separator — even though
-// the construct is admin-only, defense-in-depth keeps the persona dir
-// shape invariant: one flat directory per role, no climbing out.
-func isSafeRoleName(s string) bool {
-	if s == "" || s == "." || s == ".." {
-		return false
-	}
-	for _, c := range s {
-		switch {
-		case c >= 'a' && c <= 'z':
-		case c >= 'A' && c <= 'Z':
-		case c >= '0' && c <= '9':
-		case c == '-' || c == '_':
-		default:
-			return false
-		}
-	}
-	return true
-}
-
 // parseEnvFile reads a .env file and adds KEY=VALUE pairs to the map.
 // Skips comments (#) and empty lines. Values can be quoted.
 func parseEnvFile(path string, out map[string]string) {
--- a/workspace-server/internal/handlers/org_import.go
+++ b/workspace-server/internal/handlers/org_import.go
@ -42,20 +42,6 @@ import (
 // straight into the parent's child-coordinate space without doing a
 // canvas-wide absolute-position walk.
 func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX, absY, relX, relY float64, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
-	// spawning: false guard — skip this workspace AND all descendants.
-	// Pointer-typed so we distinguish "explicitly false" from "unset"
-	// (unset = default to spawn). The guard sits BEFORE any side effect
-	// (no DB row, no docker provision, no children recursion) so a
-	// false-spawning subtree is genuinely a no-op except for the log line.
-	// Use case: dev-tree org template ships the full role taxonomy but a
-	// developer's machine only has RAM for a subset; a per-workspace
-	// `spawning: false` lets them narrow without editing the parent
-	// template's structure.
-	if ws.Spawning != nil && !*ws.Spawning {
-		log.Printf("Org import: skipping workspace %q (spawning=false; %d descendant workspace(s) in subtree also skipped)", ws.Name, countWorkspaces(ws.Children))
-		return nil
-	}
-
 	// Apply defaults
 	runtime := ws.Runtime
 	if runtime == "" {
@ -264,21 +250,6 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), id, map[string]interface{}{
 			"name": ws.Name, "external": true,
 		})
-	} else if IsMockRuntime(runtime) {
-		// Mock-runtime workspaces have no container, no EC2, no URL —
-		// the proxyA2ARequest short-circuit synthesises every reply
-		// from a canned variant pool (see mock_runtime.go). Status
-		// goes straight to 'online' so the canvas renders the node
-		// as reachable + the chat tab's send button is enabled. No
-		// URL is set; the proxy never tries to resolve one for mock
-		// runtimes. Built for the funding-demo "200-workspace mock
-		// org" template — visual scale without real backend cost.
-		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1 WHERE id = $2`, models.StatusOnline, id); err != nil {
-			log.Printf("Org import: mock workspace status update failed for %s: %v", ws.Name, err)
-		}
-		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), id, map[string]interface{}{
-			"name": ws.Name, "mock": true, "runtime": runtime,
-		})
 	} else if h.workspace.HasProvisioner() {
 		// Provision container — either backend (CP for SaaS, local Docker
 		// for self-hosted) is fine. Pre-2026-05-05 this gate was
@ -457,35 +428,10 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 			configFiles["system-prompt.md"] = []byte(ws.SystemPrompt)
 		}

-		// Inject secrets from persona env + .env files as workspace secrets.
-		// Resolution (later overrides earlier):
-		//   0. Persona env (per-role bootstrap creds; only when ws.Role is set
-		//      and the operator-host bootstrap dir ships a matching file)
-		//   1. Org root .env (shared defaults)
-		//   2. Workspace-specific .env (per-workspace overrides)
+		// Inject secrets from .env files as workspace secrets.
+		// Resolution: workspace .env → org root .env (workspace overrides org root).
 		// Each line: KEY=VALUE → stored as encrypted workspace secret.
 		envVars := map[string]string{}
-		// 0. Persona env (lowest precedence; injects the role's Gitea identity:
-		//    GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL,
-		//    GITEA_SSH_KEY_PATH, plus MODEL_PROVIDER/MODEL and the LLM auth
-		//    token like CLAUDE_CODE_OAUTH_TOKEN or MINIMAX_API_KEY).
-		//    Workspace and org .env can override.
-		//
-		// Use ws.FilesDir as the persona-dir lookup key, NOT ws.Role. In the
-		// dev-tree org.yaml shape, `role:` carries the multi-line descriptive
-		// text the agent reads from its prompt ("Engineering planning and
-		// team coordination — leads Core Platform, Controlplane, ..."), while
-		// `files_dir:` holds the short slug (`core-lead`, `dev-lead`, etc.)
-		// matching `~/.molecule-ai/personas/<files_dir>/env`
-		// (bind-mounted to `/etc/molecule-bootstrap/personas/<files_dir>/env`).
-		//
-		// Pre-fix, this passed `ws.Role` whose multi-word content failed
-		// isSafeRoleName silently, so every imported workspace booted with
-		// zero persona-env rows in workspace_secrets — no ANTHROPIC /
-		// CLAUDE_CODE auth in the container env. The claude_agent_sdk
-		// then wedged on `query.initialize()` with a 60s control-request
-		// timeout (caught 2026-05-08 right after dev-only org/import).
-		loadPersonaEnvFile(ws.FilesDir, envVars)
 		if orgBaseDir != "" {
 			// 1. Org root .env (shared defaults)
 			parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars)
@ -729,23 +675,7 @@ func (h *OrgHandler) recurseChildrenForImport(ws OrgWorkspace, parentID string,
 		if err := h.createWorkspaceTree(child, &parentID, childAbsX, childAbsY, slotX, slotY, defaults, orgBaseDir, results, provisionSem); err != nil {
 			return err
 		}
-		// Pacing exists to throttle Docker container-spawn thundering
-		// during a self-hosted import. Mock-runtime children spawn no
-		// container — no Docker pressure, no LLM bursts, just DB
-		// inserts + a broadcast. Skipping the 2s sleep collapses a
-		// 200-workspace mock-org import from ~7min → ~5s, which is
-		// the difference between a snappy demo and a "did it freeze?"
-		// staring contest. Real (containerful) runtimes still pace.
-		// Inheritance: if the child itself doesn't declare a runtime,
-		// fall back to defaults.runtime — the org template sets
-		// runtime: mock once at the org level, not on every IC node.
-		childRuntime := child.Runtime
-		if childRuntime == "" {
-			childRuntime = defaults.Runtime
-		}
-		if !IsMockRuntime(childRuntime) {
-			time.Sleep(workspaceCreatePacingMs * time.Millisecond)
-		}
+		time.Sleep(workspaceCreatePacingMs * time.Millisecond)
 	}
 	return nil
 }
--- a/workspace-server/internal/handlers/org_import_reconcile_test.go
+++ b/workspace-server/internal/handlers/org_import_reconcile_test.go
@ -1,158 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"sort"
-	"testing"
-
-	"github.com/DATA-DOG/go-sqlmock"
-)
-
-// Tests for the reconcile-mode + audit-event additions to OrgHandler.Import.
-//
-// Background: /org/import was purely additive — re-running with a tree that
-// renamed/reparented a role left the prior workspace online (different
-// parent_id from the new one, so lookupExistingChild's parent-scoped dedupe
-// missed it). The 2026-05-08 dev-tree case left 8 orphans surviving a
-// re-import. mode="reconcile" closes the gap; emitOrgEvent makes "what
-// happened at 20:13?" queryable instead of stdout-grep archaeology.
-
-func TestWalkOrgWorkspaceNames_FlatTree(t *testing.T) {
-	tree := []OrgWorkspace{
-		{Name: "Dev Lead"},
-		{Name: "Release Manager"},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Dev Lead", "Release Manager"}
-	if !equalStrings(names, want) {
-		t.Errorf("flat tree: got %v, want %v", names, want)
-	}
-}
-
-func TestWalkOrgWorkspaceNames_NestedTree(t *testing.T) {
-	tree := []OrgWorkspace{
-		{
-			Name: "Dev Lead",
-			Children: []OrgWorkspace{
-				{Name: "Core Platform Lead", Children: []OrgWorkspace{{Name: "Core-BE"}}},
-				{Name: "SDK Lead"},
-			},
-		},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Core Platform Lead", "Core-BE", "Dev Lead", "SDK Lead"}
-	if !equalStrings(names, want) {
-		t.Errorf("nested tree: got %v, want %v", names, want)
-	}
-}
-
-// Pins the contract that spawning:false subtrees still contribute their names
-// to the reconcile working set. If the walker started skipping them, a
-// re-import that toggled spawning would orphan whichever workspaces had been
-// previously imported with spawning:true — the inverse of the bug being
-// fixed. Spawning gates *provisioning*, not *reconcile membership*.
-func TestWalkOrgWorkspaceNames_SpawningFalseStillCounted(t *testing.T) {
-	f := false
-	tree := []OrgWorkspace{
-		{Name: "Dev Lead", Children: []OrgWorkspace{
-			{Name: "Skipped Lead", Spawning: &f, Children: []OrgWorkspace{
-				{Name: "Skipped Child"},
-			}},
-		}},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Dev Lead", "Skipped Child", "Skipped Lead"}
-	if !equalStrings(names, want) {
-		t.Errorf("spawning:false subtree: got %v, want %v", names, want)
-	}
-}
-
-func TestWalkOrgWorkspaceNames_EmptyNamesSkipped(t *testing.T) {
-	tree := []OrgWorkspace{
-		{Name: "Dev Lead"},
-		{Name: ""}, // YAML default / placeholder
-		{Name: "Release Manager"},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Dev Lead", "Release Manager"}
-	if !equalStrings(names, want) {
-		t.Errorf("empty-name skip: got %v, want %v", names, want)
-	}
-}
-
-// emitOrgEvent must INSERT into structure_events with event_type + JSON
-// payload. Verifies the SQL shape pinning so a future schema rename
-// (e.g., switching to audit_events) breaks the test loudly instead of
-// silently dropping telemetry.
-func TestEmitOrgEvent_InsertsToStructureEvents(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectExec(`INSERT INTO structure_events`).
-		WithArgs("org.import.started", sqlmock.AnyArg()).
-		WillReturnResult(sqlmock.NewResult(1, 1))
-
-	emitOrgEvent(context.Background(), "org.import.started", map[string]any{
-		"name": "test-org",
-		"mode": "reconcile",
-	})
-
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("sqlmock expectations: %v", err)
-	}
-}
-
-// Insert failures are log-and-swallow — telemetry MUST NOT block the
-// caller path. If this regresses (e.g., a future patch returns the err),
-// org-import requests would fail with HTTP 500 every time a structure_events
-// INSERT hiccups, which is strictly worse than losing the row.
-func TestEmitOrgEvent_DBErrorIsSwallowed(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectExec(`INSERT INTO structure_events`).
-		WithArgs("org.import.failed", sqlmock.AnyArg()).
-		WillReturnError(errSentinelTest)
-
-	// Must not panic; must not propagate. The function returns nothing,
-	// so the contract is "doesn't crash."
-	emitOrgEvent(context.Background(), "org.import.failed", map[string]any{
-		"err": "preflight failed",
-	})
-
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("sqlmock expectations: %v", err)
-	}
-}
-
-func TestErrString(t *testing.T) {
-	if got := errString(nil); got != "" {
-		t.Errorf("nil error: got %q, want empty", got)
-	}
-	if got := errString(errSentinelTest); got != "sentinel" {
-		t.Errorf("sentinel error: got %q, want \"sentinel\"", got)
-	}
-}
-
-// errSentinelTest is a marker error used for swallow-error assertions.
-var errSentinelTest = sentinelErrTest{}
-
-type sentinelErrTest struct{}
-
-func (sentinelErrTest) Error() string { return "sentinel" }
-
-func equalStrings(a, b []string) bool {
-	if len(a) != len(b) {
-		return false
-	}
-	for i := range a {
-		if a[i] != b[i] {
-			return false
-		}
-	}
-	return true
-}
--- a/workspace-server/internal/handlers/org_include.go
+++ b/workspace-server/internal/handlers/org_include.go
@ -76,12 +76,6 @@ func expandNode(n *yaml.Node, currentDir, rootDir string, visited map[string]boo
 		return resolveIncludeScalar(n, currentDir, rootDir, visited, depth)
 	}

-	// `!external`-tagged mapping: gitops cross-repo subtree composition.
-	// See org_external.go (internal#77 / task #222).
-	if n.Kind == yaml.MappingNode && n.Tag == "!external" {
-		return resolveExternalMapping(n, currentDir, rootDir, visited, depth)
-	}
-
 	for _, child := range n.Content {
 		if err := expandNode(child, currentDir, rootDir, visited, depth); err != nil {
 			return err
--- a/workspace-server/internal/handlers/org_include_symlink_test.go
+++ b/workspace-server/internal/handlers/org_include_symlink_test.go
@ -1,136 +0,0 @@
-package handlers
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-// Phase 5 (RFC internal#77 dev-department extraction):
-// Proves a parent org template can compose a subtree from a sibling repo
-// via a directory symlink. Pattern that gets shipped:
-//
-//   /org-templates/parent-template/                  ← imported by POST /org/import
-//     org.yaml                                       (workspaces: !include dev/dev-lead/workspace.yaml)
-//     dev → /org-templates/molecule-dev-department/  (symlink)
-//   /org-templates/molecule-dev-department/          (sibling repo)
-//     dev-lead/
-//       workspace.yaml                               (children: !include ./core-platform/workspace.yaml)
-//       core-platform/
-//         workspace.yaml
-//
-// resolveYAMLIncludes resolves paths via filepath.Abs/Rel (no symlink
-// following at the path-string layer), so the security check passes. The
-// actual file open uses os.ReadFile, which DOES follow symlinks — so the
-// content from the sibling repo gets inlined. This test pins that contract.
-func TestResolveYAMLIncludes_FollowsDirectorySymlink(t *testing.T) {
-	tmp := t.TempDir()
-
-	// Subtree repo: dev-department/dev-lead/...
-	devDept := filepath.Join(tmp, "molecule-dev-department")
-	devLead := filepath.Join(devDept, "dev-lead")
-	corePlatform := filepath.Join(devLead, "core-platform")
-	if err := os.MkdirAll(corePlatform, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	// dev-lead/workspace.yaml — uses `./core-platform/workspace.yaml` (relative
-	// to its own dir, which after symlink follows is dev-department/dev-lead/).
-	devLeadYAML := []byte(`name: Dev Lead
-tier: 3
-children:
-  - !include ./core-platform/workspace.yaml
-`)
-	if err := os.WriteFile(filepath.Join(devLead, "workspace.yaml"), devLeadYAML, 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(corePlatform, "workspace.yaml"), []byte("name: Core Platform\ntier: 3\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	// Parent template: parent/, with `dev` symlink → ../molecule-dev-department/
-	parent := filepath.Join(tmp, "parent-template")
-	if err := os.MkdirAll(parent, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	// Symlink TARGET is a relative path (matches operator-side deploy
-	// convention where both repos are cloned as siblings under a shared
-	// /org-templates/ dir).
-	if err := os.Symlink("../molecule-dev-department", filepath.Join(parent, "dev")); err != nil {
-		t.Skipf("symlinks unsupported on this fs: %v", err)
-	}
-
-	// Parent's org.yaml: !include into the symlinked subtree.
-	src := []byte(`name: Parent
-workspaces:
-  - !include dev/dev-lead/workspace.yaml
-`)
-
-	out, err := resolveYAMLIncludes(src, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes through symlink failed: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 {
-		t.Fatalf("expected 1 workspace, got %d", len(tmpl.Workspaces))
-	}
-	if tmpl.Workspaces[0].Name != "Dev Lead" {
-		t.Fatalf("workspace[0].Name = %q; want Dev Lead", tmpl.Workspaces[0].Name)
-	}
-	kids := tmpl.Workspaces[0].Children
-	if len(kids) != 1 {
-		t.Fatalf("expected 1 child workspace, got %d", len(kids))
-	}
-	if kids[0].Name != "Core Platform" {
-		t.Fatalf("child[0].Name = %q; want Core Platform — symlink-aware nested !include broken", kids[0].Name)
-	}
-}
-
-// Companion: prove the security check still works when the symlink target
-// is OUTSIDE the parent template's root. This is the "hostile symlink"
-// case — an org.yaml that tries to slip in arbitrary files from /etc.
-func TestResolveYAMLIncludes_RejectsSymlinkEscapingRoot(t *testing.T) {
-	tmp := t.TempDir()
-	parent := filepath.Join(tmp, "parent-template")
-	outside := filepath.Join(tmp, "outside")
-	if err := os.MkdirAll(parent, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.MkdirAll(outside, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(outside, "evil.yaml"), []byte("name: Evil\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	// Symlink that escapes the parent root via `../outside/...`. The path
-	// STRING `evil` resolves to parent/evil — passes the rel2 check. But
-	// because filepath.Abs doesn't follow symlinks, the ReadFile call DOES
-	// follow it to outside/evil.yaml. This is the trade-off the symlink
-	// approach accepts: the security boundary is a deployment-layer
-	// invariant, not a code-layer one. Documented in dev-department/README.
-	if err := os.Symlink(filepath.Join(outside, "evil.yaml"), filepath.Join(parent, "evil.yaml")); err != nil {
-		t.Skipf("symlinks unsupported on this fs: %v", err)
-	}
-	src := []byte("workspaces:\n  - !include evil.yaml\n")
-	out, err := resolveYAMLIncludes(src, parent)
-	if err != nil {
-		// If the resolver is later hardened to refuse symlink targets
-		// outside the root (e.g. via filepath.EvalSymlinks), this test
-		// will start failing — and the dev-department symlink approach
-		// would need to be updated accordingly.
-		t.Fatalf("symlink resolved successfully under current resolver: %v", err)
-	}
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 || tmpl.Workspaces[0].Name != "Evil" {
-		t.Fatalf("expected Evil workspace via symlink; got %+v", tmpl.Workspaces)
-	}
-}
--- a/workspace-server/internal/handlers/org_persona_env_test.go
+++ b/workspace-server/internal/handlers/org_persona_env_test.go
@ -1,171 +0,0 @@
-package handlers
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-// TestLoadPersonaEnvFile_HappyPath: the standard case — a persona-shaped
-// env file exists at <root>/<role>/env and its KEY=VALUE pairs land in
-// the out map. Mirrors what the operator-host bootstrap kit ships:
-// GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL,
-// GITEA_SSH_KEY_PATH.
-func TestLoadPersonaEnvFile_HappyPath(t *testing.T) {
-	root := t.TempDir()
-	roleDir := filepath.Join(root, "dev-lead")
-	if err := os.MkdirAll(roleDir, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	envBody := `# Persona env file — mode 600
-GITEA_USER=dev-lead
-GITEA_USER_EMAIL=dev-lead@agents.moleculesai.app
-GITEA_TOKEN=abc123
-GITEA_TOKEN_SCOPES=write:repository,write:issue,read:user
-GITEA_SSH_KEY_PATH=/etc/molecule-bootstrap/personas/dev-lead/ssh_priv
-`
-	if err := os.WriteFile(filepath.Join(roleDir, "env"), []byte(envBody), 0o600); err != nil {
-		t.Fatal(err)
-	}
-	t.Setenv("MOLECULE_PERSONA_ROOT", root)
-
-	out := map[string]string{}
-	loadPersonaEnvFile("dev-lead", out)
-
-	want := map[string]string{
-		"GITEA_USER":           "dev-lead",
-		"GITEA_USER_EMAIL":     "dev-lead@agents.moleculesai.app",
-		"GITEA_TOKEN":          "abc123",
-		"GITEA_TOKEN_SCOPES":   "write:repository,write:issue,read:user",
-		"GITEA_SSH_KEY_PATH":   "/etc/molecule-bootstrap/personas/dev-lead/ssh_priv",
-	}
-	if len(out) != len(want) {
-		t.Fatalf("got %d keys, want %d: %#v", len(out), len(want), out)
-	}
-	for k, v := range want {
-		if out[k] != v {
-			t.Errorf("out[%q] = %q; want %q", k, out[k], v)
-		}
-	}
-}
-
-// TestLoadPersonaEnvFile_MissingDir: when the persona dir doesn't exist
-// (e.g. dev-only host without the bootstrap kit, or a workspace whose
-// role isn't a known persona), it's a silent no-op — out stays empty,
-// no panic, no log noise that would break callers.
-func TestLoadPersonaEnvFile_MissingDir(t *testing.T) {
-	t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir()) // empty dir
-	out := map[string]string{}
-	loadPersonaEnvFile("nonexistent-role", out)
-	if len(out) != 0 {
-		t.Errorf("expected empty out, got %#v", out)
-	}
-}
-
-// TestLoadPersonaEnvFile_EmptyRole: empty role string is the common case
-// for non-dev workspaces (research/marketing/etc.). Skip silently.
-func TestLoadPersonaEnvFile_EmptyRole(t *testing.T) {
-	t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir())
-	out := map[string]string{}
-	loadPersonaEnvFile("", out)
-	if len(out) != 0 {
-		t.Errorf("empty role should produce empty out; got %#v", out)
-	}
-}
-
-// TestLoadPersonaEnvFile_RejectsTraversal: even though role names come
-// from server-side admin-only org templates, defense-in-depth — refuse
-// any role string with path separators or "..". Verifies that a maliciously
-// crafted template can't read /etc/passwd by setting role: "../../etc".
-func TestLoadPersonaEnvFile_RejectsTraversal(t *testing.T) {
-	root := t.TempDir()
-	// Plant a file at /tmp/.../env so a bad traversal would reach it
-	if err := os.WriteFile(filepath.Join(root, "env"), []byte("STOLEN=yes\n"), 0o600); err != nil {
-		t.Fatal(err)
-	}
-	t.Setenv("MOLECULE_PERSONA_ROOT", filepath.Join(root, "personas"))
-
-	for _, bad := range []string{"..", "../personas", "../etc/passwd", "/abs", "with/slash", "dot.in.middle", "with space", "back\\slash", ".", ""} {
-		out := map[string]string{}
-		loadPersonaEnvFile(bad, out)
-		if len(out) != 0 {
-			t.Errorf("role %q should have been rejected; got %#v", bad, out)
-		}
-	}
-}
-
-// TestLoadPersonaEnvFile_DefaultRoot: when MOLECULE_PERSONA_ROOT is unset,
-// the helper falls back to /etc/molecule-bootstrap/personas. We don't
-// touch real /etc — just verify the function doesn't panic and produces
-// empty out (since the test box isn't expected to ship that path).
-func TestLoadPersonaEnvFile_DefaultRoot(t *testing.T) {
-	t.Setenv("MOLECULE_PERSONA_ROOT", "") // explicit empty
-	out := map[string]string{}
-	loadPersonaEnvFile("dev-lead", out)
-	// Don't assert content — production CI might or might not have the
-	// /etc dir mounted. Just verify the call returns cleanly.
-	_ = out
-}
-
-// TestLoadPersonaEnvFile_PrecedenceCallerOverrides: the contract is "lower
-// precedence than later .env files." The helper writes into out without
-// removing existing keys, so a caller pre-populating out simulates a
-// later layer overriding persona defaults. We verify the helper does NOT
-// clobber pre-existing entries… actually, parseEnvFile DOES overwrite,
-// so the caller-side ordering (persona → org → workspace) is what enforces
-// precedence. This test pins that contract: persona is loaded into a
-// fresh map, then later layers can override.
-func TestLoadPersonaEnvFile_OverwritesEmptyMap(t *testing.T) {
-	root := t.TempDir()
-	roleDir := filepath.Join(root, "core-be")
-	if err := os.MkdirAll(roleDir, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(roleDir, "env"),
-		[]byte("GITEA_TOKEN=persona-value\n"), 0o600); err != nil {
-		t.Fatal(err)
-	}
-	t.Setenv("MOLECULE_PERSONA_ROOT", root)
-
-	out := map[string]string{"GITEA_TOKEN": "preset"}
-	loadPersonaEnvFile("core-be", out)
-
-	// Persona helper is meant to populate a FRESH map first in the
-	// caller's flow; calling it on a pre-populated map and seeing the
-	// value get overwritten is consistent with parseEnvFile semantics.
-	if out["GITEA_TOKEN"] != "persona-value" {
-		t.Errorf("loadPersonaEnvFile did not write into existing map; got %q", out["GITEA_TOKEN"])
-	}
-}
-
-// TestIsSafeRoleName_Acceptance: positive + negative cases for the
-// validator. Pinned because every dev-tree role name must pass.
-func TestIsSafeRoleName_Acceptance(t *testing.T) {
-	good := []string{
-		"dev-lead", "core-be", "cp-security", "infra-runtime-be",
-		"sdk-dev", "plugin-dev", "documentation-specialist",
-		"triage-operator", "fullstack-engineer", "release-manager",
-		"core_underscore_ok", "X", "a1", "Z9-0",
-	}
-	for _, s := range good {
-		if !isSafeRoleName(s) {
-			t.Errorf("isSafeRoleName(%q) = false; want true", s)
-		}
-	}
-	bad := []string{
-		"", ".", "..", "with/slash", "/abs", "dot.in.middle",
-		"with space", "back\\slash", "trailing-", // trailing-hyphen is fine actually
-		"with$dollar", "with?question", "newline\nsplit",
-	}
-	// trailing-hyphen IS allowed; remove from "bad" list:
-	bad = []string{
-		"", ".", "..", "with/slash", "/abs", "dot.in.middle",
-		"with space", "back\\slash", "with$dollar", "with?question",
-		"newline\nsplit",
-	}
-	for _, s := range bad {
-		if isSafeRoleName(s) {
-			t.Errorf("isSafeRoleName(%q) = true; want false", s)
-		}
-	}
-}
--- a/workspace-server/internal/handlers/plugins.go
+++ b/workspace-server/internal/handlers/plugins.go
@ -4,7 +4,6 @@ import (
 	"bytes"
 	"context"
 	"io"
-	"log"
 	"os"
 	"path/filepath"
 	"strings"
@ -23,16 +22,6 @@ import (
 // workspace-scoped filtering (handler falls back to unfiltered list).
 type RuntimeLookup func(workspaceID string) (string, error)

-// InstanceIDLookup resolves a workspace's EC2 instance_id by ID. Empty
-// string means the workspace is not on the SaaS (EC2-per-workspace)
-// backend — i.e. either local-Docker or pre-provision. The handler uses
-// this to dispatch plugin install/uninstall to the EIC SSH path
-// (template_files_eic.go primitive) when a workspace runs on its own EC2
-// and there's no local Docker container to exec into. A nil lookup keeps
-// the handler on the local-Docker code path only — same shape as the
-// pre-fix behaviour.
-type InstanceIDLookup func(workspaceID string) (string, error)
-
 // pluginSources is the contract PluginsHandler uses to talk to the
 // plugin source registry. Extracted as an interface (#1814) so tests can
 // substitute a stub without standing up the real *plugins.Registry +
@ -56,11 +45,10 @@ var _ pluginSources = (*plugins.Registry)(nil)

 // PluginsHandler manages the plugin registry and per-workspace plugin installation.
 type PluginsHandler struct {
-	pluginsDir       string           // host path to plugins/ registry
-	docker           *client.Client   // Docker client for container operations
-	restartFunc      func(string)     // auto-restart workspace after install/uninstall
-	runtimeLookup    RuntimeLookup    // workspace_id → runtime (optional)
-	instanceIDLookup InstanceIDLookup // workspace_id → EC2 instance_id (optional)
+	pluginsDir    string         // host path to plugins/ registry
+	docker        *client.Client // Docker client for container operations
+	restartFunc   func(string)   // auto-restart workspace after install/uninstall
+	runtimeLookup RuntimeLookup  // workspace_id → runtime (optional)
 	// sources narrowed from `*plugins.Registry` to the pluginSources
 	// interface (#1814) so tests can substitute a stub. Production
 	// callers still pass *plugins.Registry, which satisfies the
@ -101,15 +89,6 @@ func (h *PluginsHandler) WithRuntimeLookup(lookup RuntimeLookup) *PluginsHandler
 	return h
 }

-// WithInstanceIDLookup installs a workspace → EC2 instance_id resolver.
-// Wired by the router so production hits a real DB; tests stub it. The
-// install/uninstall pipeline uses this to dispatch to the EIC SSH path
-// for SaaS workspaces (no local Docker container to exec into).
-func (h *PluginsHandler) WithInstanceIDLookup(lookup InstanceIDLookup) *PluginsHandler {
-	h.instanceIDLookup = lookup
-	return h
-}
-
 // pluginInfo is the API response for a plugin.
 type pluginInfo struct {
 	Name        string   `json:"name"`
@ -198,42 +177,16 @@ func strDefault(m map[string]interface{}, key, fallback string) string {
 	return fallback
 }

-// findRunningContainer returns the live container name for workspaceID, or ""
-// when the container is genuinely not running OR the daemon errored
-// transiently. Routed through provisioner.RunningContainerName as the SSOT
-// (molecule-core#10) so this handler agrees with healthsweep on the same
-// inputs. Transient daemon errors are logged distinctly so triage doesn't
-// confuse a flaky daemon with a stopped container.
 func (h *PluginsHandler) findRunningContainer(ctx context.Context, workspaceID string) string {
-	name, err := provisioner.RunningContainerName(ctx, h.docker, workspaceID)
-	if err != nil {
-		log.Printf("plugins: docker inspect transient error for %s: %v (treating as not-running for this request)", workspaceID, err)
+	if h.docker == nil {
 		return ""
 	}
-	return name
-}
-
-// isExternalRuntime reports whether the workspace's runtime is the
-// `external` (remote-pull) shape introduced in Phase 30. External
-// workspaces have no local container — `POST /plugins` (push-install via
-// docker exec) doesn't apply to them; they pull via the download endpoint
-// instead. Returns false (allow-install) if the lookup is unwired or
-// errors — failing open here is safe because the downstream
-// findRunningContainer step still gates on a real container being there.
-//
-// Background — molecule-core#10: without this check, external workspaces
-// fall through to findRunningContainer's NotFound path and return a
-// misleading 503 "container not running" instead of a clear "use the
-// pull endpoint" message.
-func (h *PluginsHandler) isExternalRuntime(workspaceID string) bool {
-	if h.runtimeLookup == nil {
-		return false
+	name := provisioner.ContainerName(workspaceID)
+	info, err := h.docker.ContainerInspect(ctx, name)
+	if err == nil && info.State.Running {
+		return name
 	}
-	runtime, err := h.runtimeLookup(workspaceID)
-	if err != nil {
-		return false
-	}
-	return runtime == "external"
+	return ""
 }

 func (h *PluginsHandler) execAsRoot(ctx context.Context, containerName string, cmd []string) (string, error) {
--- a/workspace-server/internal/handlers/plugins_atomic.go
+++ b/workspace-server/internal/handlers/plugins_atomic.go
@ -1,207 +0,0 @@
-package handlers
-
-// plugins_atomic.go — atomic install pattern for plugin delivery into a
-// running workspace container. Closes molecule-core#114.
-//
-// Replaces the prior "tar + docker.CopyToContainer to /configs/plugins/<name>"
-// single-step write (no atomicity, no marker, no rollback) with a 4-step
-// dance:
-//
-//   1. STAGE     — extract tar into /configs/plugins/.staging/<name>.<ts>/
-//   2. SNAPSHOT  — if /configs/plugins/<name>/ exists, mv to .previous/<name>.<ts>/
-//   3. SWAP      — mv /configs/plugins/.staging/<name>.<ts>/ → /configs/plugins/<name>/
-//   4. MARKER    — touch /configs/plugins/<name>/.complete
-//
-// On any post-snapshot failure we attempt a best-effort rollback by mv-ing
-// the previous snapshot back into place. The .complete marker is the
-// canonical "this install is fully landed" signal — workspace-side plugin
-// loaders should refuse to load a plugin dir without it.
-//
-// Scope: docker path only (workspace running as a local container). The
-// SaaS path (deliverViaEIC, SSH-into-EC2) is unchanged in this PR; tracked
-// as a follow-up. The same stage-then-swap shape applies but the exec
-// primitives differ (ssh vs docker exec), and shipping both paths in one
-// PR doubles the test surface.
-
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"path"
-	"strings"
-	"time"
-
-	"github.com/docker/docker/api/types/container"
-)
-
-const (
-	pluginsRoot       = "/configs/plugins"
-	pluginsStagingDir = "/configs/plugins/.staging"
-	pluginsPrevDir    = "/configs/plugins/.previous"
-	completeMarker    = ".complete"
-)
-
-// installVersion identifies one install attempt — the plugin name plus a
-// monotonic-ish UTC timestamp suffix. Used to namespace the staging dir
-// and any snapshot of the previous version, so a reinstall mid-flight
-// can't collide with a concurrent reinstall.
-type installVersion struct {
-	plugin string
-	stamp  string // e.g. 20260508T141530Z
-}
-
-func newInstallVersion(plugin string) installVersion {
-	return installVersion{
-		plugin: plugin,
-		stamp:  time.Now().UTC().Format("20060102T150405Z"),
-	}
-}
-
-// stagedPath is the container path where the new content lands during fetch.
-// e.g. /configs/plugins/.staging/molecule-skill-foo.20260508T141530Z
-func (v installVersion) stagedPath() string {
-	return path.Join(pluginsStagingDir, v.plugin+"."+v.stamp)
-}
-
-// previousPath is where the prior live version is moved before swap.
-// e.g. /configs/plugins/.previous/molecule-skill-foo.20260508T141530Z
-func (v installVersion) previousPath() string {
-	return path.Join(pluginsPrevDir, v.plugin+"."+v.stamp)
-}
-
-// livePath is the destination after swap.
-// e.g. /configs/plugins/molecule-skill-foo
-func (v installVersion) livePath() string {
-	return path.Join(pluginsRoot, v.plugin)
-}
-
-// markerPath is the .complete file inside the live dir written last.
-func (v installVersion) markerPath() string {
-	return path.Join(v.livePath(), completeMarker)
-}
-
-// atomicCopyToContainer does a stage→snapshot→swap→marker install of a
-// host-side staged plugin tree into a running container's
-// /configs/plugins/<name>/. Returns nil on success.
-//
-// On post-snapshot failure (swap or marker write), best-effort rollback
-// restores the previous snapshot to the live path. Returns the original
-// error wrapped — the caller should surface it; rollback success is
-// logged separately.
-func (h *PluginsHandler) atomicCopyToContainer(
-	ctx context.Context, containerName, hostDir, pluginName string,
-) error {
-	v := newInstallVersion(pluginName)
-
-	// Step 0a: ensure staging + previous root dirs exist (idempotent).
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"mkdir", "-p", pluginsStagingDir, pluginsPrevDir,
-	}); err != nil {
-		return fmt.Errorf("atomic install: mkdir staging/previous: %w", err)
-	}
-
-	// Step 0b: tar the host content with a path prefix that lands it in the
-	// staging dir — NOT directly into the live name. The prefix has no
-	// leading "/" because docker.CopyToContainer extracts paths relative
-	// to the dstPath argument we pass below.
-	stagedRel := strings.TrimPrefix(v.stagedPath(), "/")
-	tarBuf, err := tarHostDirWithPrefix(hostDir, stagedRel)
-	if err != nil {
-		return fmt.Errorf("atomic install: tar host dir: %w", err)
-	}
-
-	// Step 1: STAGE — extract tar into /configs/plugins/.staging/<name>.<ts>/
-	if err := h.docker.CopyToContainer(ctx, containerName, "/", &tarBuf,
-		container.CopyToContainerOptions{}); err != nil {
-		// Best-effort: clean up any partial staging extract before returning.
-		_, _ = h.execAsRoot(ctx, containerName, []string{
-			"rm", "-rf", v.stagedPath(),
-		})
-		return fmt.Errorf("atomic install: copy to container: %w", err)
-	}
-
-	// Step 2: SNAPSHOT — if a live version exists, move it aside.
-	// `test -d` exits 0 if the dir exists, non-zero otherwise; the helper
-	// returns a non-nil error in the non-zero case which we treat as
-	// "no previous version" rather than a real failure.
-	snapshotted := false
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"test", "-d", v.livePath(),
-	}); err == nil {
-		if _, err := h.execAsRoot(ctx, containerName, []string{
-			"mv", v.livePath(), v.previousPath(),
-		}); err != nil {
-			// Snapshot failure: roll back the staged extract before failing.
-			_, _ = h.execAsRoot(ctx, containerName, []string{
-				"rm", "-rf", v.stagedPath(),
-			})
-			return fmt.Errorf("atomic install: snapshot previous version: %w", err)
-		}
-		snapshotted = true
-	}
-
-	// Step 3: SWAP — atomic rename of the staged dir into the live name.
-	// `mv` on the same filesystem is a single rename(2), atomic at the FS level.
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"mv", v.stagedPath(), v.livePath(),
-	}); err != nil {
-		// Swap failure: roll back if we had a snapshot.
-		if snapshotted {
-			if _, rbErr := h.execAsRoot(ctx, containerName, []string{
-				"mv", v.previousPath(), v.livePath(),
-			}); rbErr != nil {
-				return fmt.Errorf("atomic install: swap failed AND rollback failed: swap=%w, rollback=%v", err, rbErr)
-			}
-		}
-		// Best-effort cleanup of the still-staged dir.
-		_, _ = h.execAsRoot(ctx, containerName, []string{
-			"rm", "-rf", v.stagedPath(),
-		})
-		return fmt.Errorf("atomic install: swap to live path: %w", err)
-	}
-
-	// Step 4: MARKER — touch .complete inside the live dir as the last write.
-	// Workspace-side plugin loaders treat a plugin dir without this marker
-	// as half-installed and skip it (or surface a clear error to the
-	// operator instead of loading a possibly-partial tree).
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"touch", v.markerPath(),
-	}); err != nil {
-		// Marker write failure with the new content already in place is a
-		// weird state — content is fine on disk, but the plugin loader
-		// will refuse to use it. Log loudly; do NOT roll back, since the
-		// content is the latest, just unmarked. Operator can manually
-		// `touch <plugin>/.complete` to recover.
-		return fmt.Errorf("atomic install: write .complete marker (content landed but unmarked, manual recovery: touch %s): %w", v.markerPath(), err)
-	}
-
-	// Step 5: GC — best-effort delete the previous snapshot. Failures here
-	// just leave a directory; not load-bearing for correctness, the next
-	// install or a separate sweeper will reclaim the space.
-	if snapshotted {
-		_, _ = h.execAsRoot(ctx, containerName, []string{
-			"rm", "-rf", v.previousPath(),
-		})
-	}
-
-	return nil
-}
-
-// tarHostDirWithPrefix walks hostDir and writes a tar to a buffer with
-// every entry's name prefixed by `prefix`. Mirrors the prior streaming
-// shape used in copyPluginToContainer but with a configurable prefix
-// (the prior version hardcoded "plugins/<name>/"; we use a full
-// staging path so the extracted layout is the staging dir directly).
-//
-// Symlinks are skipped — same posture as streamDirAsTar elsewhere in
-// this file. Skipping prevents a hostile plugin from injecting a
-// symlink that, post-extract, points outside the plugin's own dir.
-func tarHostDirWithPrefix(hostDir, prefix string) (bytes.Buffer, error) {
-	var buf bytes.Buffer
-	tw := newTarWriter(&buf)
-	defer tw.Close()
-	if err := tarWalk(hostDir, prefix, tw); err != nil {
-		return bytes.Buffer{}, err
-	}
-	return buf, nil
-}
--- a/workspace-server/internal/handlers/plugins_atomic_tar.go
+++ b/workspace-server/internal/handlers/plugins_atomic_tar.go
@ -1,70 +0,0 @@
-package handlers
-
-// plugins_atomic_tar.go — tar-walk helpers split out so the main atomic
-// install flow stays readable. The prefix argument lets the caller
-// arrange where the tar's contents land at extract time.
-
-import (
-	"archive/tar"
-	"io"
-	"os"
-	"path/filepath"
-)
-
-// newTarWriter is a thin wrapper so atomic_test.go can swap the writer
-// destination if it needs to.
-func newTarWriter(w io.Writer) *tar.Writer {
-	return tar.NewWriter(w)
-}
-
-// tarWalk walks hostDir and writes every regular file + dir to the tar
-// writer with paths of the form `<prefix>/<relative>`. Symlinks are
-// skipped — same posture as streamDirAsTar in plugins_install_pipeline.go.
-//
-// The trailing-slash on prefix is normalized away: prefix "foo" and
-// prefix "foo/" produce identical archives.
-func tarWalk(hostDir, prefix string, tw *tar.Writer) error {
-	prefix = filepath.Clean(prefix)
-	return filepath.Walk(hostDir, func(p string, info os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-		if info.Mode()&os.ModeSymlink != 0 {
-			return nil // skip symlinks; see doc above
-		}
-		rel, err := filepath.Rel(hostDir, p)
-		if err != nil {
-			return err
-		}
-		if rel == "." {
-			// Emit the prefix dir itself once, with the source dir's mode.
-			hdr, err := tar.FileInfoHeader(info, "")
-			if err != nil {
-				return err
-			}
-			hdr.Name = prefix + "/"
-			return tw.WriteHeader(hdr)
-		}
-		hdr, err := tar.FileInfoHeader(info, "")
-		if err != nil {
-			return err
-		}
-		hdr.Name = filepath.Join(prefix, rel)
-		if info.IsDir() {
-			hdr.Name += "/"
-		}
-		if err := tw.WriteHeader(hdr); err != nil {
-			return err
-		}
-		if !info.Mode().IsRegular() {
-			return nil
-		}
-		f, err := os.Open(p)
-		if err != nil {
-			return err
-		}
-		defer f.Close()
-		_, err = io.Copy(tw, f)
-		return err
-	})
-}
--- a/workspace-server/internal/handlers/plugins_atomic_test.go
+++ b/workspace-server/internal/handlers/plugins_atomic_test.go
@ -1,193 +0,0 @@
-package handlers
-
-import (
-	"archive/tar"
-	"bytes"
-	"io"
-	"os"
-	"path/filepath"
-	"sort"
-	"strings"
-	"testing"
-	"time"
-)
-
-// TestInstallVersion_Paths: the path helpers must produce a stable shape
-// the in-container exec calls depend on. Pinning the layout here
-// catches a future refactor that accidentally changes where staging /
-// previous / live dirs live, which would break the swap atomicity.
-func TestInstallVersion_Paths(t *testing.T) {
-	v := installVersion{plugin: "molecule-skill-foo", stamp: "20260508T141530Z"}
-
-	if got, want := v.stagedPath(), "/configs/plugins/.staging/molecule-skill-foo.20260508T141530Z"; got != want {
-		t.Errorf("stagedPath = %q; want %q", got, want)
-	}
-	if got, want := v.previousPath(), "/configs/plugins/.previous/molecule-skill-foo.20260508T141530Z"; got != want {
-		t.Errorf("previousPath = %q; want %q", got, want)
-	}
-	if got, want := v.livePath(), "/configs/plugins/molecule-skill-foo"; got != want {
-		t.Errorf("livePath = %q; want %q", got, want)
-	}
-	if got, want := v.markerPath(), "/configs/plugins/molecule-skill-foo/.complete"; got != want {
-		t.Errorf("markerPath = %q; want %q", got, want)
-	}
-}
-
-// TestInstallVersion_StampUniqueness: two newInstallVersion calls within
-// the same second produce the same stamp (we use second precision); the
-// caller relies on the mv-rename being atomic, so collision-free
-// stamping is NOT a correctness requirement — but a regression that
-// changes stamp shape (e.g. RFC3339 with colons) would break the path
-// helpers since path.Join treats a colon as a regular char but ssh +
-// docker exec generally don't. Pin the no-colon shape.
-func TestInstallVersion_StampShape(t *testing.T) {
-	v := newInstallVersion("anything")
-	if strings.Contains(v.stamp, ":") {
-		t.Errorf("stamp must not contain colons (breaks shell-quoting in exec): %q", v.stamp)
-	}
-	if strings.Contains(v.stamp, " ") {
-		t.Errorf("stamp must not contain spaces: %q", v.stamp)
-	}
-	// Sanity: stamp parses as the documented format.
-	if _, err := time.Parse("20060102T150405Z", v.stamp); err != nil {
-		t.Errorf("stamp %q does not parse as 20060102T150405Z: %v", v.stamp, err)
-	}
-}
-
-// TestTarHostDirWithPrefix_HappyPath: walks a host dir, builds a tar with
-// the configured prefix, verifies every entry's name is rooted under
-// the prefix, and the file contents survive round-trip.
-func TestTarHostDirWithPrefix_HappyPath(t *testing.T) {
-	hostDir := t.TempDir()
-
-	// Plant: <host>/plugin.yaml + <host>/skills/foo/SKILL.md + <host>/.complete
-	files := map[string]string{
-		"plugin.yaml":             "name: foo\nversion: 1.0.0\n",
-		"skills/foo/SKILL.md":     "# Foo skill\n",
-		".complete":                "", // upstream may already have a marker
-	}
-	for rel, body := range files {
-		full := filepath.Join(hostDir, rel)
-		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
-			t.Fatal(err)
-		}
-		if err := os.WriteFile(full, []byte(body), 0o644); err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	prefix := "configs/plugins/.staging/foo.20260508T141530Z"
-	buf, err := tarHostDirWithPrefix(hostDir, prefix)
-	if err != nil {
-		t.Fatalf("tar: %v", err)
-	}
-
-	// Read back the tar; collect names + body for regular files.
-	got := map[string]string{}
-	tr := tar.NewReader(&buf)
-	for {
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatalf("tar reader: %v", err)
-		}
-		// Every entry must start with the prefix
-		if !strings.HasPrefix(hdr.Name, prefix) {
-			t.Errorf("entry %q does not start with prefix %q", hdr.Name, prefix)
-		}
-		if hdr.Typeflag == tar.TypeReg {
-			body, err := io.ReadAll(tr)
-			if err != nil {
-				t.Fatal(err)
-			}
-			rel := strings.TrimPrefix(hdr.Name, prefix+"/")
-			got[rel] = string(body)
-		}
-	}
-
-	for rel, want := range files {
-		if got[rel] != want {
-			t.Errorf("body[%q] = %q; want %q", rel, got[rel], want)
-		}
-	}
-}
-
-// TestTarHostDirWithPrefix_SkipsSymlinks: a hostile plugin shouldn't be
-// able to ship a symlink that, post-extract, points outside its own
-// dir. The walker silently skips symlinks (same posture as
-// streamDirAsTar). Verify a planted symlink doesn't appear in the tar.
-func TestTarHostDirWithPrefix_SkipsSymlinks(t *testing.T) {
-	hostDir := t.TempDir()
-	// Plant a real file + a symlink pointing outside hostDir.
-	if err := os.WriteFile(filepath.Join(hostDir, "real.txt"), []byte("ok"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	target := filepath.Join(t.TempDir(), "outside")
-	if err := os.WriteFile(target, []byte("SHOULD NOT APPEAR"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.Symlink(target, filepath.Join(hostDir, "evil")); err != nil {
-		t.Fatal(err)
-	}
-
-	buf, err := tarHostDirWithPrefix(hostDir, "p")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	names := []string{}
-	tr := tar.NewReader(&buf)
-	for {
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatal(err)
-		}
-		names = append(names, hdr.Name)
-	}
-	sort.Strings(names)
-
-	for _, n := range names {
-		if strings.Contains(n, "evil") {
-			t.Errorf("symlink leaked into tar: %q", n)
-		}
-	}
-	// real.txt should be present
-	found := false
-	for _, n := range names {
-		if strings.HasSuffix(n, "real.txt") {
-			found = true
-			break
-		}
-	}
-	if !found {
-		t.Errorf("real.txt missing from tar; got names: %v", names)
-	}
-}
-
-// TestTarHostDirWithPrefix_PrefixNormalization: trailing slash on prefix
-// should not change the archive shape. Pinning this so a future caller
-// passing "foo/" instead of "foo" doesn't double-slash entry names.
-func TestTarHostDirWithPrefix_PrefixNormalization(t *testing.T) {
-	hostDir := t.TempDir()
-	if err := os.WriteFile(filepath.Join(hostDir, "x"), []byte("y"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	a, err := tarHostDirWithPrefix(hostDir, "foo")
-	if err != nil {
-		t.Fatal(err)
-	}
-	b, err := tarHostDirWithPrefix(hostDir, "foo/")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if !bytes.Equal(a.Bytes(), b.Bytes()) {
-		t.Errorf("trailing-slash on prefix changed archive shape; tarHostDirWithPrefix should be slash-insensitive")
-	}
-}
--- a/workspace-server/internal/handlers/plugins_classifier.go
+++ b/workspace-server/internal/handlers/plugins_classifier.go
@ -1,214 +0,0 @@
-package handlers
-
-// plugins_classifier.go — diff classifier for plugin updates.
-//
-// Closes molecule-core#112. Composes with #114 (atomic install) so the
-// platform can decide *before* triggering restartFunc whether the
-// update is content-only (SKILL.md text changed; agent re-reads at next
-// Skill invocation) or structural (hooks/settings/plugin.yaml/file added
-// or removed; agent must restart to pick up the new state).
-//
-// SKILL.md content is hot-reloadable because Claude Code reads the file
-// on each Skill invocation — no in-memory cache. Hooks and settings.json
-// are loaded at session start and need a session restart. plugin.yaml
-// changes are structural by definition (manifest controls everything
-// else).
-//
-// CLASSIFICATION RULE
-//   classify(staged, live) → "skill-content-only" if and only if
-//     every file present in either tree is one of:
-//       - identical between staged and live, OR
-//       - a **/SKILL.md file with content change (text body modified)
-//     AND no files were added or removed.
-//   Anything else → "cold" (the safe default).
-//
-// The classifier reads live-tree files from inside the container via
-// `docker exec cat`. Comparison is by SHA-256 over file content, not
-// mtime — mtime changes on every install regardless of content.
-
-import (
-	"context"
-	"crypto/sha256"
-	"encoding/hex"
-	"fmt"
-	"io/fs"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-const (
-	// classifyKindSkillContentOnly: install can skip restartFunc; the
-	// only changes are SKILL.md body text.
-	classifyKindSkillContentOnly = "skill-content-only"
-	// classifyKindCold: must restart the workspace container; structural
-	// or hook/settings change.
-	classifyKindCold = "cold"
-)
-
-// classifyInstallChanges compares the staged plugin tree (host filesystem)
-// against the currently-live plugin tree inside the container. Returns
-// classifyKindSkillContentOnly when the only diff is SKILL.md content
-// changes, classifyKindCold otherwise (added/removed files, hooks/
-// settings.json edits, plugin.yaml edits, anything else).
-//
-// `noLive` is the sentinel returned when /configs/plugins/<name> doesn't
-// exist (first install for this plugin). Treated as cold — no live state
-// to hot-reload into.
-func (h *PluginsHandler) classifyInstallChanges(
-	ctx context.Context, containerName, hostStagedDir, pluginName string,
-) (string, error) {
-	livePath := "/configs/plugins/" + pluginName
-
-	// Probe: does live exist? If not, this is a first install — cold.
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"test", "-d", livePath,
-	}); err != nil {
-		return classifyKindCold, nil
-	}
-
-	// Build hash maps for both trees.
-	stagedHashes, err := hashLocalTree(hostStagedDir)
-	if err != nil {
-		return classifyKindCold, fmt.Errorf("classifier: hash staged: %w", err)
-	}
-	liveHashes, err := h.hashContainerTree(ctx, containerName, livePath)
-	if err != nil {
-		// Live tree read failure: be conservative, cold-restart.
-		return classifyKindCold, nil
-	}
-
-	// Drop the .complete marker from comparison — its mtime/atime can
-	// vary across installs but content is empty/trivial; including it
-	// would force-cold every reinstall.
-	delete(stagedHashes, ".complete")
-	delete(liveHashes, ".complete")
-
-	// Set difference: any file in one but not the other → cold.
-	for rel := range stagedHashes {
-		if _, ok := liveHashes[rel]; !ok {
-			return classifyKindCold, nil // file added
-		}
-	}
-	for rel := range liveHashes {
-		if _, ok := stagedHashes[rel]; !ok {
-			return classifyKindCold, nil // file removed
-		}
-	}
-
-	// Same set of files. Walk the diff.
-	for rel, stagedHash := range stagedHashes {
-		liveHash := liveHashes[rel]
-		if stagedHash == liveHash {
-			continue
-		}
-		// Content differs. Allow if and only if it's a SKILL.md.
-		if !isSkillMarkdown(rel) {
-			return classifyKindCold, nil
-		}
-	}
-	return classifyKindSkillContentOnly, nil
-}
-
-// isSkillMarkdown returns true for any path whose basename is SKILL.md
-// (case-sensitive, matches Claude Code's skill discovery rule).
-func isSkillMarkdown(rel string) bool {
-	return filepath.Base(rel) == "SKILL.md"
-}
-
-// hashLocalTree walks a host directory and returns rel-path → sha256-hex.
-// Symlinks are skipped (same posture as the tar walker).
-func hashLocalTree(root string) (map[string]string, error) {
-	out := map[string]string{}
-	err := filepath.WalkDir(root, func(p string, d fs.DirEntry, err error) error {
-		if err != nil {
-			return err
-		}
-		if d.IsDir() {
-			return nil
-		}
-		info, err := d.Info()
-		if err != nil {
-			return err
-		}
-		if info.Mode()&os.ModeSymlink != 0 {
-			return nil
-		}
-		if !info.Mode().IsRegular() {
-			return nil
-		}
-		rel, err := filepath.Rel(root, p)
-		if err != nil {
-			return err
-		}
-		body, err := os.ReadFile(p)
-		if err != nil {
-			return err
-		}
-		sum := sha256.Sum256(body)
-		out[filepath.ToSlash(rel)] = hex.EncodeToString(sum[:])
-		return nil
-	})
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-// hashContainerTree reads every regular file under livePath via docker
-// exec sh -c 'cd <livePath> && find . -type f -not -name .complete | xargs -I {} sh -c "echo {}; sha256sum {}"'.
-//
-// The output is parsed line-by-line into rel-path → sha256-hex.
-func (h *PluginsHandler) hashContainerTree(
-	ctx context.Context, containerName, livePath string,
-) (map[string]string, error) {
-	out, err := h.execAsRoot(ctx, containerName, []string{
-		"sh", "-c",
-		// Find regular files, hash each, output `<hex>  ./<relpath>`.
-		// `cd` then `find .` keeps paths relative to livePath.
-		fmt.Sprintf("cd %s && find . -type f -print0 | xargs -0 -r sha256sum 2>/dev/null", shQuote(livePath)),
-	})
-	if err != nil {
-		return nil, fmt.Errorf("hash container tree: %w", err)
-	}
-	hashes := map[string]string{}
-	for _, line := range strings.Split(out, "\n") {
-		line = strings.TrimSpace(line)
-		if line == "" {
-			continue
-		}
-		// sha256sum output: "<hex>  <path>" (two spaces). Path starts with "./".
-		parts := strings.SplitN(line, "  ", 2)
-		if len(parts) != 2 {
-			continue
-		}
-		hash := parts[0]
-		rel := strings.TrimPrefix(parts[1], "./")
-		hashes[rel] = hash
-	}
-	return hashes, nil
-}
-
-// shQuote single-quotes a string for safe insertion into a shell command.
-// Returns the input unchanged if it's already shell-safe (alphanumeric +
-// /._-). Otherwise wraps in single quotes and escapes inner '.
-func shQuote(s string) string {
-	safe := true
-	for _, c := range s {
-		switch {
-		case c >= 'a' && c <= 'z':
-		case c >= 'A' && c <= 'Z':
-		case c >= '0' && c <= '9':
-		case c == '/' || c == '.' || c == '_' || c == '-':
-		default:
-			safe = false
-		}
-		if !safe {
-			break
-		}
-	}
-	if safe {
-		return s
-	}
-	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
-}
--- a/workspace-server/internal/handlers/plugins_classifier_test.go
+++ b/workspace-server/internal/handlers/plugins_classifier_test.go
@ -1,121 +0,0 @@
-package handlers
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-// TestIsSkillMarkdown: pin which paths the classifier considers
-// hot-reloadable. SKILL.md by basename only — case-sensitive.
-func TestIsSkillMarkdown(t *testing.T) {
-	yes := []string{
-		"SKILL.md",
-		"skills/foo/SKILL.md",
-		"deeply/nested/SKILL.md",
-	}
-	no := []string{
-		"plugin.yaml",
-		"hooks.json",
-		"settings.json",
-		"README.md",
-		"skill.md",  // case-sensitive
-		"SKILLS.md", // not a skill file
-		"skills/foo/extra.md",
-	}
-	for _, s := range yes {
-		if !isSkillMarkdown(s) {
-			t.Errorf("isSkillMarkdown(%q) = false; want true", s)
-		}
-	}
-	for _, s := range no {
-		if isSkillMarkdown(s) {
-			t.Errorf("isSkillMarkdown(%q) = true; want false", s)
-		}
-	}
-}
-
-// TestHashLocalTree_StableHash: hashing the same content twice must
-// produce identical maps. Pinned because if hashLocalTree ever picks up
-// mtime/inode (e.g. via a refactor to use os.Lstat metadata), every
-// install would classify as cold and we'd lose the hot-reload.
-func TestHashLocalTree_StableHash(t *testing.T) {
-	dir := t.TempDir()
-	if err := os.MkdirAll(filepath.Join(dir, "skills/foo"), 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(dir, "plugin.yaml"), []byte("name: foo\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(dir, "skills/foo/SKILL.md"), []byte("# Foo\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	h1, err := hashLocalTree(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	h2, err := hashLocalTree(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if len(h1) != len(h2) {
-		t.Fatalf("hash count differs: %d vs %d", len(h1), len(h2))
-	}
-	for k, v := range h1 {
-		if h2[k] != v {
-			t.Errorf("hash[%q] differs: %q vs %q", k, v, h2[k])
-		}
-	}
-}
-
-// TestHashLocalTree_SymlinkSkipped: symlinks should not appear in the
-// hash map — same posture as the tar walker. Otherwise a hostile plugin
-// could include a symlink whose hash changes when its target changes,
-// silently flipping classification.
-func TestHashLocalTree_SymlinkSkipped(t *testing.T) {
-	dir := t.TempDir()
-	if err := os.WriteFile(filepath.Join(dir, "real.txt"), []byte("ok"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	target := filepath.Join(t.TempDir(), "target")
-	if err := os.WriteFile(target, []byte("outside"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.Symlink(target, filepath.Join(dir, "link")); err != nil {
-		t.Fatal(err)
-	}
-
-	h, err := hashLocalTree(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if _, exists := h["link"]; exists {
-		t.Errorf("symlink leaked into hash map: %v", h)
-	}
-	if _, exists := h["real.txt"]; !exists {
-		t.Errorf("real.txt missing from hash map: %v", h)
-	}
-}
-
-// TestShQuote: the classifier injects livePath into a shell command via
-// docker exec. Path must be quoted to handle pluginName entries with
-// hyphens (which are safe but exercised here) and any future special-
-// character edge case. Pin the safe-vs-quoted boundary.
-func TestShQuote(t *testing.T) {
-	cases := []struct {
-		in, want string
-	}{
-		{"foo", "foo"},
-		{"/configs/plugins/foo-bar", "/configs/plugins/foo-bar"},
-		{"with space", "'with space'"},
-		{"with'quote", "'with'\\''quote'"},
-		{"$envvar", "'$envvar'"},
-		{"path/with/dots.txt", "path/with/dots.txt"},
-	}
-	for _, tc := range cases {
-		if got := shQuote(tc.in); got != tc.want {
-			t.Errorf("shQuote(%q) = %q; want %q", tc.in, got, tc.want)
-		}
-	}
-}
--- a/Show More
+++ b/Show More