68 changed files with 2331 additions and 4827 deletions
--- a/.gitea/scripts/audit-force-merge.sh
+++ b/.gitea/scripts/audit-force-merge.sh
@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-# audit-force-merge — detect a §SOP-6 force-merge after PR close, emit
-# `incident.force_merge` to stdout as structured JSON.
-#
-# Vector's docker_logs source picks up runner stdout; the JSON gets
-# shipped to Loki on molecule-canonical-obs, indexable by event_type.
-# Query example:
-#
-#   {host="operator"} |= "event_type" |= "incident.force_merge" | json
-#
-# A force-merge is detected when a PR closed-with-merged=true had at
-# least one of the repo's required-status-check contexts in a state
-# other than "success" at the merge commit's SHA. That's exactly what
-# the Gitea force_merge:true API call lets through, so it's a faithful
-# detector of the override path.
-#
-# Triggers on `pull_request_target: closed` (loaded from base branch
-# per §SOP-6 security model). No-op when merged=false.
-#
-# Required env (set by the workflow):
-#   GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER, REQUIRED_CHECKS
-#
-# REQUIRED_CHECKS is a newline-separated list of status-check context
-# names that branch protection requires. Declared in the workflow YAML
-# rather than fetched from /branch_protections (which needs admin
-# scope — sop-tier-bot has read-only). Trade dynamism for simplicity:
-# when the required-check set changes, update both branch protection
-# AND this env. Keeping them in sync is less complexity than granting
-# the audit bot admin perms on every repo.
-
-set -euo pipefail
-
-: "${GITEA_TOKEN:?required}"
-: "${GITEA_HOST:?required}"
-: "${REPO:?required}"
-: "${PR_NUMBER:?required}"
-: "${REQUIRED_CHECKS:?required (newline-separated context names)}"
-
-OWNER="${REPO%%/*}"
-NAME="${REPO##*/}"
-API="https://${GITEA_HOST}/api/v1"
-AUTH="Authorization: token ${GITEA_TOKEN}"
-
-# 1. Fetch the PR. If not merged, no-op.
-PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
-MERGED=$(echo "$PR" | jq -r '.merged // false')
-if [ "$MERGED" != "true" ]; then
-  echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
-  exit 0
-fi
-
-MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
-MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
-TITLE=$(echo "$PR" | jq -r '.title // ""')
-BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
-HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
-
-if [ -z "$MERGE_SHA" ]; then
-  echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
-  exit 0
-fi
-
-# 2. Required status checks declared in the workflow env.
-REQUIRED="$REQUIRED_CHECKS"
-if [ -z "${REQUIRED//[[:space:]]/}" ]; then
-  echo "::notice::REQUIRED_CHECKS empty — force-merge not applicable."
-  exit 0
-fi
-
-# 3. Status-check state at the PR HEAD (where checks ran). The merge
-#    commit doesn't get its own checks; we evaluate the PR's last
-#    commit, which is what branch protection compared against.
-STATUS=$(curl -sS -H "$AUTH" \
-  "${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
-declare -A CHECK_STATE
-while IFS=$'\t' read -r ctx state; do
-  [ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
-done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
-
-# 4. For each required check, was it green at merge? YAML block scalars
-#    (`|`) leave a trailing newline; skip blank/whitespace-only lines.
-FAILED_CHECKS=()
-while IFS= read -r req; do
-  trimmed="${req#"${req%%[![:space:]]*}"}"   # ltrim
-  trimmed="${trimmed%"${trimmed##*[![:space:]]}"}"  # rtrim
-  [ -z "$trimmed" ] && continue
-  state="${CHECK_STATE[$trimmed]:-missing}"
-  if [ "$state" != "success" ]; then
-    FAILED_CHECKS+=("${trimmed}=${state}")
-  fi
-done <<< "$REQUIRED"
-
-if [ "${#FAILED_CHECKS[@]}" -eq 0 ]; then
-  echo "::notice::PR #${PR_NUMBER} merged with all required checks green — not a force-merge."
-  exit 0
-fi
-
-# 5. Emit structured audit event.
-NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ)
-FAILED_JSON=$(printf '%s\n' "${FAILED_CHECKS[@]}" | jq -R . | jq -s .)
-
-# Print as a single-line JSON so Vector's parse_json transform can pick
-# it up cleanly from docker_logs.
-jq -nc \
-  --arg event_type "incident.force_merge" \
-  --arg ts "$NOW" \
-  --arg repo "$REPO" \
-  --argjson pr "$PR_NUMBER" \
-  --arg title "$TITLE" \
-  --arg base "$BASE_BRANCH" \
-  --arg merged_by "$MERGED_BY" \
-  --arg merge_sha "$MERGE_SHA" \
-  --argjson failed_checks "$FAILED_JSON" \
-  '{event_type: $event_type, ts: $ts, repo: $repo, pr: $pr, title: $title,
-    base_branch: $base, merged_by: $merged_by, merge_sha: $merge_sha,
-    failed_checks: $failed_checks}'
-
-echo "::warning::FORCE-MERGE detected on PR #${PR_NUMBER} by ${MERGED_BY}: ${#FAILED_CHECKS[@]} required check(s) not green at merge time."
--- a/.gitea/scripts/sop-tier-check.sh
+++ b/.gitea/scripts/sop-tier-check.sh
@ -1,149 +0,0 @@
-#!/usr/bin/env bash
-# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
-#
-# Reads the PR's tier label, walks approving reviewers, and checks each
-# approver's Gitea team membership against the tier's eligible-team set.
-# Marks pass only when at least one non-author approver is in an eligible
-# team.
-#
-# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
-# the env vars below; this script does no IO outside of stdout/stderr +
-# the Gitea API.
-#
-# Required env:
-#   GITEA_TOKEN   — bot PAT with read:organization,read:user,
-#                   read:issue,read:repository scopes
-#   GITEA_HOST    — e.g. git.moleculesai.app
-#   REPO          — owner/name (from github.repository)
-#   PR_NUMBER     — int (from github.event.pull_request.number)
-#   PR_AUTHOR     — login (from github.event.pull_request.user.login)
-#
-# Optional:
-#   SOP_DEBUG=1   — print per-API-call diagnostic lines (HTTP codes,
-#                   raw response bodies). Default: off.
-#
-# Stale-status caveat: Gitea Actions does not always re-fire workflows
-# on `labeled` / `pull_request_review:submitted` events. If the
-# sop-tier-check status is stale (e.g. red after labels/approvals were
-# added), push an empty commit to the PR branch to force a synchronize
-# event, OR re-request reviews. Tracked: internal#46.
-
-set -euo pipefail
-
-debug() {
-  if [ "${SOP_DEBUG:-}" = "1" ]; then
-    echo "  [debug] $*" >&2
-  fi
-}
-
-# Validate env
-: "${GITEA_TOKEN:?GITEA_TOKEN required}"
-: "${GITEA_HOST:?GITEA_HOST required}"
-: "${REPO:?REPO required (owner/name)}"
-: "${PR_NUMBER:?PR_NUMBER required}"
-: "${PR_AUTHOR:?PR_AUTHOR required}"
-
-OWNER="${REPO%%/*}"
-NAME="${REPO##*/}"
-API="https://${GITEA_HOST}/api/v1"
-AUTH="Authorization: token ${GITEA_TOKEN}"
-echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
-
-# Sanity: token resolves to a user
-WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""')
-if [ -z "$WHOAMI" ]; then
-  echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
-  exit 1
-fi
-echo "::notice::token resolves to user: $WHOAMI"
-
-# 1. Read tier label
-LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name')
-TIER=""
-for L in $LABELS; do
-  case "$L" in
-    tier:low|tier:medium|tier:high)
-      if [ -n "$TIER" ]; then
-        echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
-        exit 1
-      fi
-      TIER="$L"
-    ;;
-  esac
-done
-if [ -z "$TIER" ]; then
-  echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
-  exit 1
-fi
-debug "tier=$TIER"
-
-# 2. Tier → eligible teams
-case "$TIER" in
-  tier:low)    ELIGIBLE="engineers managers ceo" ;;
-  tier:medium) ELIGIBLE="managers ceo" ;;
-  tier:high)   ELIGIBLE="ceo" ;;
-esac
-debug "eligible_teams=$ELIGIBLE"
-
-# Resolve team-name → team-id once. /orgs/{org}/teams/{slug}/... endpoints
-# don't exist on Gitea 1.22; we have to use /teams/{id}.
-ORG_TEAMS_FILE=$(mktemp)
-trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
-HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
-  "${API}/orgs/${OWNER}/teams")
-debug "teams-list HTTP=$HTTP_CODE size=$(wc -c <"$ORG_TEAMS_FILE")"
-if [ "${SOP_DEBUG:-}" = "1" ]; then
-  echo "  [debug] teams-list body (first 300 chars):" >&2
-  head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
-fi
-if [ "$HTTP_CODE" != "200" ]; then
-  echo "::error::GET /orgs/${OWNER}/teams returned HTTP $HTTP_CODE — token likely lacks read:org scope. Add a SOP_TIER_CHECK_TOKEN secret with read:organization scope at the org level."
-  exit 1
-fi
-declare -A TEAM_ID
-for T in $ELIGIBLE; do
-  ID=$(jq -r --arg t "$T" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
-  if [ -z "$ID" ] || [ "$ID" = "null" ]; then
-    VISIBLE=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
-    echo "::error::Team \"$T\" not found in org $OWNER. Teams visible: $VISIBLE"
-    exit 1
-  fi
-  TEAM_ID[$T]="$ID"
-  debug "team-id: $T → $ID"
-done
-
-# 3. Read approving reviewers
-REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
-APPROVERS=$(echo "$REVIEWS" | jq -r '[.[] | select(.state=="APPROVED") | .user.login] | unique | .[]')
-if [ -z "$APPROVERS" ]; then
-  echo "::error::No approving reviews. Tier $TIER requires approval from {$ELIGIBLE} (non-author)."
-  exit 1
-fi
-debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
-
-# 4. For each approver: check non-author + team membership (by id)
-OK=""
-for U in $APPROVERS; do
-  if [ "$U" = "$PR_AUTHOR" ]; then
-    debug "skip self-review by $U"
-    continue
-  fi
-  for T in $ELIGIBLE; do
-    ID="${TEAM_ID[$T]}"
-    CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
-      "${API}/teams/${ID}/members/${U}")
-    debug "probe: $U in team $T (id=$ID) → HTTP $CODE"
-    if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
-      echo "::notice::approver $U is in team $T (eligible for $TIER)"
-      OK="yes"
-      break
-    fi
-  done
-  [ -n "$OK" ] && break
-done
-
-if [ -z "$OK" ]; then
-  echo "::error::Tier $TIER requires approval from a non-author member of {$ELIGIBLE}. Got approvers: $APPROVERS — none of them satisfied team membership. Set SOP_DEBUG=1 to see per-probe HTTP codes."
-  exit 1
-fi
-echo "::notice::sop-tier-check passed: $TIER, approver in {$ELIGIBLE}"
--- a/.gitea/workflows/audit-force-merge.yml
+++ b/.gitea/workflows/audit-force-merge.yml
@ -1,58 +0,0 @@
-# audit-force-merge — emit `incident.force_merge` to runner stdout when
-# a PR is merged with required-status-checks not green. Vector picks
-# the JSON line off docker_logs and ships to Loki on
-# molecule-canonical-obs (per `reference_obs_stack_phase1`); query as:
-#
-#   {host="operator"} |= "event_type" |= "incident.force_merge" | json
-#
-# Closes the §SOP-6 audit gap (the doc says force-merges write to
-# `structure_events`, but that table lives in the platform DB, not
-# Gitea-side; Loki is the practical equivalent for Gitea Actions
-# events). When the credential / observability stack converges later,
-# this can sync into structure_events from Loki via a backfill job —
-# the structured JSON shape is forward-compatible.
-#
-# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
-# extract pattern as sop-tier-check.
-
-name: audit-force-merge
-
-# pull_request_target loads from the base branch — same security model
-# as sop-tier-check. Without this, an attacker could rewrite the
-# workflow on a PR and skip the audit emission for their own
-# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
-# rationale.
-on:
-  pull_request_target:
-    types: [closed]
-
-jobs:
-  audit:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: read
-    # Skip when PR is closed without merge — saves a runner.
-    if: github.event.pull_request.merged == true
-    steps:
-      - name: Check out base branch (for the script)
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          ref: ${{ github.event.pull_request.base.sha }}
-      - name: Detect force-merge + emit audit event
-        env:
-          # Same org-level secret the sop-tier-check workflow uses.
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
-          GITEA_HOST: git.moleculesai.app
-          REPO: ${{ github.repository }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          # Required-status-check contexts to evaluate at merge time.
-          # Newline-separated. Mirror this against branch protection
-          # (settings → branches → protected branch → required checks).
-          # Declared here rather than fetched from /branch_protections
-          # because that endpoint requires admin write — sop-tier-bot is
-          # read-only by design (least-privilege).
-          REQUIRED_CHECKS: |
-            sop-tier-check / tier-check (pull_request)
-            Secret scan / Scan diff for credential-shaped strings (pull_request)
-        run: bash .gitea/scripts/audit-force-merge.sh
--- a/.gitea/workflows/secret-scan.yml
+++ b/.gitea/workflows/secret-scan.yml
@ -1,191 +0,0 @@
-name: Secret scan
-
-# Hard CI gate. Refuses any PR / push whose diff additions contain a
-# recognisable credential. Defense-in-depth for the #2090-class incident
-# (2026-04-24): GitHub's hosted Copilot Coding Agent leaked a ghs_*
-# installation token into tenant-proxy/package.json via `npm init`
-# slurping the URL from a token-embedded origin remote. We can't fix
-# upstream's clone hygiene, so we gate here.
-#
-# Same regex set as the runtime's bundled pre-commit hook
-# (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh).
-# Keep the two sides aligned when adding patterns.
-#
-# Ported from .github/workflows/secret-scan.yml so the gate actually
-# fires on Gitea Actions. Differences from the GitHub version:
-#   - drops `merge_group` event (Gitea has no merge queue)
-#   - drops `workflow_call` (no cross-repo reusable invocation on Gitea)
-#   - SELF path updated to .gitea/workflows/secret-scan.yml
-# The job name + step name are identical to the GitHub workflow so the
-# status-check context (`Secret scan / Scan diff for credential-shaped
-# strings (pull_request)`) matches branch protection on molecule-core/main.
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened]
-  push:
-    branches: [main, staging]
-
-jobs:
-  scan:
-    name: Scan diff for credential-shaped strings
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 2  # need previous commit to diff against on push events
-
-      # For pull_request events the diff base may be many commits behind
-      # HEAD and absent from the shallow clone. Fetch it explicitly.
-      - name: Fetch PR base SHA (pull_request events only)
-        if: github.event_name == 'pull_request'
-        run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
-
-      - name: Refuse if credential-shaped strings appear in diff additions
-        env:
-          # Plumb event-specific SHAs through env so the script doesn't
-          # need conditional `${{ ... }}` interpolation per event type.
-          # github.event.before/after only exist on push events;
-          # pull_request has pull_request.base.sha / pull_request.head.sha.
-          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
-          PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-          PUSH_BEFORE: ${{ github.event.before }}
-          PUSH_AFTER: ${{ github.event.after }}
-        run: |
-          # Pattern set covers GitHub family (the actual #2090 vector),
-          # Anthropic / OpenAI / Slack / AWS. Anchored on prefixes with low
-          # false-positive rates against agent-generated content. Mirror of
-          # molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh
-          # — keep aligned.
-          SECRET_PATTERNS=(
-            'ghp_[A-Za-z0-9]{36,}'           # GitHub PAT (classic)
-            'ghs_[A-Za-z0-9]{36,}'           # GitHub App installation token
-            'gho_[A-Za-z0-9]{36,}'           # GitHub OAuth user-to-server
-            'ghu_[A-Za-z0-9]{36,}'           # GitHub OAuth user
-            'ghr_[A-Za-z0-9]{36,}'           # GitHub OAuth refresh
-            'github_pat_[A-Za-z0-9_]{82,}'   # GitHub fine-grained PAT
-            'sk-ant-[A-Za-z0-9_-]{40,}'      # Anthropic API key
-            'sk-proj-[A-Za-z0-9_-]{40,}'     # OpenAI project key
-            'sk-svcacct-[A-Za-z0-9_-]{40,}'  # OpenAI service-account key
-            'sk-cp-[A-Za-z0-9_-]{60,}'       # MiniMax API key (F1088 vector — caught only after the fact)
-            'xox[baprs]-[A-Za-z0-9-]{20,}'   # Slack tokens
-            'AKIA[0-9A-Z]{16}'               # AWS access key ID
-            'ASIA[0-9A-Z]{16}'               # AWS STS temp access key ID
-          )
-
-          # Determine the diff base. Each event type stores its SHAs in
-          # a different place — see the env block above.
-          case "${{ github.event_name }}" in
-            pull_request)
-              BASE="$PR_BASE_SHA"
-              HEAD="$PR_HEAD_SHA"
-              ;;
-            *)
-              BASE="$PUSH_BEFORE"
-              HEAD="$PUSH_AFTER"
-              ;;
-          esac
-
-          # On push events with shallow clones, BASE may be present in
-          # the event payload but absent from the local object DB
-          # (fetch-depth=2 doesn't always reach the previous commit
-          # across true merges). Try fetching it on demand. If the
-          # fetch fails — e.g. the SHA was force-overwritten — we fall
-          # through to the empty-BASE branch below, which scans the
-          # entire tree as if every file were new. Correct, just slow.
-          if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then
-            if ! git cat-file -e "$BASE" 2>/dev/null; then
-              git fetch --depth=1 origin "$BASE" 2>/dev/null || true
-            fi
-          fi
-
-          # Files added or modified in this change.
-          if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then
-            # New branch / no previous SHA / BASE unreachable — check the
-            # entire tree as added content. Slower, but correct on first
-            # push.
-            CHANGED=$(git ls-tree -r --name-only HEAD)
-            DIFF_RANGE=""
-          else
-            CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD")
-            DIFF_RANGE="$BASE $HEAD"
-          fi
-
-          if [ -z "$CHANGED" ]; then
-            echo "No changed files to inspect."
-            exit 0
-          fi
-
-          # Self-exclude: this workflow file legitimately contains the
-          # pattern strings as regex literals. Without an exclude it would
-          # block its own merge. Both the .github/ original and this
-          # .gitea/ port are excluded so a sync between them stays clean.
-          SELF_GITHUB=".github/workflows/secret-scan.yml"
-          SELF_GITEA=".gitea/workflows/secret-scan.yml"
-
-          OFFENDING=""
-          # `while IFS= read -r` (not `for f in $CHANGED`) so filenames
-          # containing whitespace don't word-split silently — a path
-          # with a space would otherwise produce two iterations on
-          # tokens that aren't real filenames, breaking the
-          # self-exclude + diff lookup.
-          while IFS= read -r f; do
-            [ -z "$f" ] && continue
-            [ "$f" = "$SELF_GITHUB" ] && continue
-            [ "$f" = "$SELF_GITEA" ] && continue
-            if [ -n "$DIFF_RANGE" ]; then
-              ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true)
-            else
-              # No diff range (new branch first push) — scan the full file
-              # contents as if every line were new.
-              ADDED=$(cat "$f" 2>/dev/null || true)
-            fi
-            [ -z "$ADDED" ] && continue
-            for pattern in "${SECRET_PATTERNS[@]}"; do
-              if echo "$ADDED" | grep -qE "$pattern"; then
-                OFFENDING="${OFFENDING}${f} (matched: ${pattern})\n"
-                break
-              fi
-            done
-          done <<< "$CHANGED"
-
-          if [ -n "$OFFENDING" ]; then
-            echo "::error::Credential-shaped strings detected in diff additions:"
-            # `printf '%b' "$OFFENDING"` interprets backslash escapes
-            # (the literal `\n` we appended above becomes a newline)
-            # WITHOUT treating OFFENDING as a format string. Plain
-            # `printf "$OFFENDING"` is a format-string sink: a filename
-            # containing `%` would be interpreted as a conversion
-            # specifier, corrupting the error message (or printing
-            # `%(missing)` artifacts).
-            printf '%b' "$OFFENDING"
-            echo ""
-            echo "The actual matched values are NOT echoed here, deliberately —"
-            echo "round-tripping a leaked credential into CI logs widens the blast"
-            echo "radius (logs are searchable + retained)."
-            echo ""
-            echo "Recovery:"
-            echo "  1. Remove the secret from the file. Replace with an env var"
-            echo "     reference (e.g. \${{ secrets.GITHUB_TOKEN }} in workflows,"
-            echo "     process.env.X in code)."
-            echo "  2. If the credential was already pushed (this PR's commit"
-            echo "     history reaches a public ref), treat it as compromised —"
-            echo "     ROTATE it immediately, do not just remove it. The token"
-            echo "     remains valid in git history forever and may be in any"
-            echo "     log/cache that consumed this branch."
-            echo "  3. Force-push the cleaned commit (or stack a revert) and"
-            echo "     re-run CI."
-            echo ""
-            echo "If the match is a false positive (test fixture, docs example,"
-            echo "or this workflow's own regex literals): use a clearly-fake"
-            echo "placeholder like ghs_EXAMPLE_DO_NOT_USE that doesn't satisfy"
-            echo "the length suffix, OR add the file path to the SELF exclude"
-            echo "list in this workflow with a short reason."
-            echo ""
-            echo "Mirror of the regex set lives in the runtime's bundled"
-            echo "pre-commit hook (molecule-ai-workspace-runtime:"
-            echo "molecule_runtime/scripts/pre-commit-checks.sh) — keep aligned."
-            exit 1
-          fi
-
-          echo "✓ No credential-shaped strings in this change."
--- a/.gitea/workflows/sop-tier-check.yml
+++ b/.gitea/workflows/sop-tier-check.yml
@ -1,81 +0,0 @@
-# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
-#
-# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
-# from the previous inline-bash version). The script is the single source
-# of truth; this workflow file just sets env + invokes it.
-#
-# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
-# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
-# §SOP-6 PR gate enforced. Pair with branch protection on the protected
-# branch:
-#   required_status_checks:    ["sop-tier-check / tier-check (pull_request)"]
-#   required_approving_reviews: 1
-#   approving_review_teams:    ["ceo", "managers", "engineers"]
-#
-# Tier → eligible-team mapping (mirror of dev-sop §SOP-6):
-#   tier:low    → engineers, managers, ceo
-#   tier:medium → managers, ceo
-#   tier:high   → ceo
-#
-# Force-merge: Owners-team override remains available out-of-band via
-# the Gitea merge API; force-merge writes `incident.force_merge` to
-# `structure_events` per §Persistent structured logging gate (Phase 3).
-#
-# Set `SOP_DEBUG: '1'` in the env block to enable per-API-call diagnostic
-# lines — useful when diagnosing token-scope or team-id-resolution
-# issues. Default off.
-
-name: sop-tier-check
-
-# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
-# `pull_request_target` loads the workflow definition from the BASE
-# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
-# with write access to a feature branch could rewrite this file in
-# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
-# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
-# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
-# is the documented mitigation.
-#
-# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
-# untrusted code is ever executed in the runner — we only HTTP-call the
-# Gitea API. If a future change adds a checkout step, it MUST pin to
-# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
-# the trust boundary.
-on:
-  pull_request_target:
-    types: [opened, edited, synchronize, reopened, labeled, unlabeled]
-  pull_request_review:
-    types: [submitted, dismissed, edited]
-
-jobs:
-  tier-check:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: read
-    steps:
-      - name: Check out base branch (for the script)
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          # Pin to base.sha — pull_request_target's protection only
-          # works if we never check out PR HEAD. Same SHA the workflow
-          # itself was loaded from.
-          ref: ${{ github.event.pull_request.base.sha }}
-      - name: Verify tier label + reviewer team membership
-        env:
-          # SOP_TIER_CHECK_TOKEN is the org-level secret for the
-          # sop-tier-bot PAT (read:organization,read:user,read:issue,
-          # read:repository). Stored at the org level
-          # (/api/v1/orgs/molecule-ai/actions/secrets) so per-repo
-          # configuration is unnecessary — every repo in the org
-          # picks it up automatically.
-          # Falls back to GITHUB_TOKEN with a clear error if missing.
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
-          GITEA_HOST: git.moleculesai.app
-          REPO: ${{ github.repository }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-          # Set to '1' for diagnostic per-API-call output. Off by default
-          # so production logs aren't noisy.
-          SOP_DEBUG: '0'
-        run: bash .gitea/scripts/sop-tier-check.sh
--- a/.github/workflows/auto-promote-on-e2e.yml
+++ b/.github/workflows/auto-promote-on-e2e.yml
@ -0,0 +1,467 @@
+name: Auto-promote :latest after main image build
+
+# Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
+# → `:latest` after either the image build or E2E completes on a `main`
+# push, gated on E2E Staging SaaS not being red for that SHA.
+#
+# Why two triggers:
+#
+#   `publish-workspace-server-image` and `e2e-staging-saas` are both
+#   paths-filtered, but with DIFFERENT path sets:
+#
+#     publish-workspace-server-image:
+#       workspace-server/**, canvas/**, manifest.json
+#
+#     e2e-staging-saas (full lifecycle):
+#       workspace-server/internal/handlers/{registry,workspace_provision,
+#       a2a_proxy}.go, workspace-server/internal/middleware/**,
+#       workspace-server/internal/provisioner/**, tests/e2e/test_staging_full_saas.sh
+#
+#   The E2E set is a strict SUBSET of the publish set. So:
+#     - canvas/** changes → publish fires, E2E does not
+#     - workspace-server/cmd/** changes → publish fires, E2E does not
+#     - workspace-server/internal/sweep/** → publish fires, E2E does not
+#
+#   The previous version triggered ONLY on E2E completion, which meant
+#   non-E2E-path changes (canvas, cmd, sweep, etc.) rebuilt the image
+#   but never advanced `:latest`. Result: as of 2026-04-28 this workflow
+#   had run zero times since merge despite eight main pushes — `:latest`
+#   was ~7 hours / 9 PRs behind main with no human realising. See
+#   `molecule-core` Slack discussion 2026-04-28.
+#
+#   Adding `publish-workspace-server-image` as a second trigger closes
+#   the gap: any image rebuild on main eligibly advances `:latest`.
+#
+# Why E2E remains a kill-switch (not the trigger):
+#
+#   When E2E DID run for this SHA and ended red, we abort — `:latest`
+#   stays on the prior known-good digest. When E2E didn't run (paths
+#   filtered out), we proceed: pre-merge gates already validated this
+#   SHA on staging via auto-promote-staging requiring CI + E2E Canvas +
+#   E2E API + CodeQL all green. Image content for non-E2E-paths
+#   (canvas, cmd, sweep) is exercised by those staging gates.
+#
+# Why `main` only:
+#
+#   `:latest` is what prod tenants pull. We only want SHAs that have
+#   reached main (via auto-promote-staging) to advance `:latest`.
+#   Triggering on staging would let a staging-only revert advance
+#   `:latest` to a SHA that never reaches main, breaking the "production
+#   runs what's on main" invariant.
+#
+# Idempotency:
+#
+#   When a SHA touches paths that match BOTH publish and E2E, both
+#   workflows fire and complete. Both trigger this workflow on
+#   completion → two runs race. Both retag `:staging-<sha>` →
+#   `:latest`. crane tag is idempotent (re-tagging the same digest is a
+#   no-op), so the second run is harmless. concurrency group serializes
+#   them anyway.
+
+on:
+  workflow_run:
+    workflows:
+      - 'E2E Staging SaaS (full lifecycle)'
+      - 'publish-workspace-server-image'
+    types: [completed]
+    branches: [main]
+  workflow_dispatch:
+    inputs:
+      sha:
+        description: 'Short sha to promote (override; defaults to upstream workflow_run head_sha)'
+        required: false
+        type: string
+
+permissions:
+  contents: read
+  packages: write
+
+concurrency:
+  # Serialize promotes per-SHA so the publish+E2E both-fired race lands
+  # cleanly. Different SHAs can promote in parallel.
+  group: auto-promote-latest-${{ github.event.workflow_run.head_sha || github.event.inputs.sha || github.sha }}
+  cancel-in-progress: false
+
+env:
+  IMAGE_NAME: ghcr.io/molecule-ai/platform
+  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
+
+jobs:
+  promote:
+    # Proceed if upstream succeeded OR manual dispatch. Upstream-failure
+    # paths are filtered here; the E2E-was-red kill-switch lives in the
+    # gate-check step below (covers the case where upstream is publish
+    # success but E2E for the same SHA failed).
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Compute short sha
+        id: sha
+        run: |
+          set -euo pipefail
+          if [ -n "${{ github.event.inputs.sha }}" ]; then
+            FULL="${{ github.event.inputs.sha }}"
+          else
+            FULL="${{ github.event.workflow_run.head_sha }}"
+          fi
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+          echo "full=${FULL}" >> "$GITHUB_OUTPUT"
+
+      - name: Gate — E2E Staging SaaS state for this SHA
+        # When upstream IS E2E success, we know it's green (filtered by
+        # the job-level `if` already). When upstream is publish, look up
+        # E2E state for the same SHA. Four buckets:
+        #
+        #   - completed/success: E2E confirmed safe → proceed
+        #   - completed/failure|cancelled|timed_out: E2E found a
+        #     regression → ABORT (exit 1), `:latest` stays put
+        #   - in_progress|queued|requested: E2E is RACING with publish
+        #     for a runtime-touching SHA. publish typically completes
+        #     ~5-10min before E2E (~10-15min). If we promote on the
+        #     publish signal here, a later E2E failure can't roll back
+        #     `:latest` — it'd already be wrongly advanced. So we DEFER:
+        #     skip subsequent steps (proceed=false) and let E2E's own
+        #     completion event re-fire this workflow, which then takes
+        #     the upstream-is-E2E path. exit 0 so the run shows as
+        #     success rather than a noisy fake-failure.
+        #   - none/none: E2E was paths-filtered out for this SHA (the
+        #     change touched canvas/cmd/sweep/etc. — paths covered by
+        #     publish but not by E2E). pre-merge gates on staging
+        #     already validated this SHA → proceed.
+        #
+        # Manual dispatch skips this check — operator override.
+        id: gate
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
+          SHA: ${{ steps.sha.outputs.full }}
+          UPSTREAM_NAME: ${{ github.event.workflow_run.name }}
+          EVENT_NAME: ${{ github.event_name }}
+        run: |
+          set -euo pipefail
+
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Manual dispatch — skipping E2E gate (operator override)"
+            exit 0
+          fi
+
+          if [ "$UPSTREAM_NAME" = "E2E Staging SaaS (full lifecycle)" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Upstream is E2E itself (success per job-level if) — gate trivially satisfied"
+            exit 0
+          fi
+
+          # Upstream is publish-workspace-server-image. Check E2E state
+          # for the same SHA via Gitea's commit-status API.
+          #
+          # GitHub-era this was `gh run list --workflow=X --commit=SHA
+          # --json status,conclusion` returning either `[]` (no run on
+          # this SHA) or `[{status, conclusion}]` (the run's state).
+          # Gitea has NO workflow-runs API at all — `/api/v1/repos/.../
+          # actions/runs` returns 404 (verified 2026-05-07, issue #75).
+          # However Gitea Actions DOES emit a commit status per workflow
+          # job, with `context = "<Workflow Name> / <Job Name> (<event>)"`,
+          # which is exactly what we need: each E2E run leg becomes one
+          # status row on the SHA, and the aggregate state encodes the
+          # run's outcome.
+          #
+          # Mapping:
+          #   0 matched contexts          → "none/none"      (E2E paths-
+          #                                                    filtered
+          #                                                    out — same
+          #                                                    semantic
+          #                                                    as before)
+          #   any context = pending       → "in_progress/none" (defer)
+          #   any context = error|failure → "completed/failure" (abort)
+          #   all contexts = success      → "completed/success" (proceed)
+          #
+          # The "completed/cancelled" and "completed/timed_out" buckets
+          # don't have direct Gitea analogs (Gitea statuses are
+          # success / failure / error / pending / warning). Per-SHA
+          # concurrency cancellation surfaces as `error` on Gitea, which
+          # we map to "completed/failure" rather than "completed/cancelled"
+          # — losing the soft-defer semantic of the cancelled bucket on
+          # this fleet. Tradeoff: the staleness alarm (auto-promote-stale-
+          # alarm.yml) still catches a stuck :latest within 4h, and a
+          # legitimate cancel is rare enough that aborting + manual
+          # re-dispatch is acceptable. If we measure cancel frequency
+          # > 1/week, revisit by reading the run-step-summary text via
+          # a follow-up script.
+          #
+          # Network or auth blips collapse to "none/none" via the curl
+          # `|| true` fallback, matching the pre-Gitea behaviour where
+          # an empty list also degenerated to none/none.
+          GITEA_API_URL="${GITHUB_SERVER_URL:-https://git.moleculesai.app}/api/v1"
+          STATUSES_JSON=$(curl --fail-with-body -sS \
+            -H "Authorization: token ${GH_TOKEN}" \
+            -H "Accept: application/json" \
+            "${GITEA_API_URL}/repos/${REPO}/commits/${SHA}/statuses?limit=100" \
+            2>/dev/null || echo "[]")
+          RESULT=$(printf '%s' "$STATUSES_JSON" | jq -r '
+            # Filter to E2E Staging SaaS (full lifecycle) statuses.
+            # Match by leading workflow-name prefix so the "<job>
+            # (<event>)" tail is irrelevant. Gitea emits the workflow
+            # name verbatim from the YAML `name:` field.
+            [.[] | select(.context | startswith("E2E Staging SaaS (full lifecycle) /"))] as $rows
+            | if ($rows | length) == 0 then
+                "none/none"
+              elif any($rows[]; .status == "pending") then
+                "in_progress/none"
+              elif any($rows[]; .status == "failure" or .status == "error") then
+                "completed/failure"
+              elif all($rows[]; .status == "success") then
+                "completed/success"
+              else
+                # Mixed / unknown — fall through to *) bucket below.
+                "completed/" + ($rows[0].status // "unknown")
+              end
+          ' 2>/dev/null || echo "none/none")
+
+          echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
+
+          case "$RESULT" in
+            completed/success)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E green for this SHA — proceeding with promote"
+              ;;
+            completed/failure|completed/timed_out)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
+                echo "\`:latest\` stays on the prior known-good digest."
+                echo
+                echo "If the failure was a flake, manually dispatch this workflow with the same sha to override."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            completed/cancelled)
+              # GitHub-era only: cancelled ≠ failure. Gitea statuses
+              # don't expose a "cancelled" state — a per-SHA concurrency
+              # cancellation surfaces as `failure` or `error` on Gitea
+              # and is now handled by the failure branch above. This
+              # arm is kept for backwards compatibility / dual-host
+              # operation (if we ever add a non-Gitea fallback) but
+              # under the post-#75 flow it's unreachable.
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ⏭ Auto-promote deferred — E2E Staging SaaS was cancelled"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
+                echo "Likely per-SHA concurrency (newer push superseded this E2E run)."
+                echo "The newer SHA's E2E will fire its own promote when it lands."
+                echo "If you need this specific SHA promoted, manually dispatch."
+              } >> "$GITHUB_STEP_SUMMARY"
+              ;;
+            in_progress/*|queued/*|requested/*|waiting/*|pending/*)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ⏳ Auto-promote deferred — E2E Staging SaaS still running"
+                echo
+                echo "Publish completed before E2E for \`${SHA:0:7}\` (state: \`$RESULT\`)."
+                echo "Skipping retag here — E2E's own completion event will re-fire this workflow."
+                echo "If E2E ends green, that run promotes \`:latest\`. If red, it aborts."
+              } >> "$GITHUB_STEP_SUMMARY"
+              ;;
+            none/none)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E paths-filtered out for this SHA — pre-merge staging gates carry"
+              ;;
+            *)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❓ Auto-promote aborted — unexpected E2E state"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\` (unhandled)"
+                echo "Manual investigation needed; re-dispatch with the same sha once resolved."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+          esac
+
+      - if: steps.gate.outputs.proceed == 'true'
+        uses: imjasonh/setup-crane@6da1ae018866400525525ce74ff892880c099987 # v0.5
+
+      - name: GHCR login
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | \
+            crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Verify :staging-<sha> exists for both images
+        # Better to fail fast with a clear message than to half-tag
+        # (platform retagged but platform-tenant missing → tenants pull
+        # a stale image).
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          set -euo pipefail
+          for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
+            tag="${img}:staging-${{ steps.sha.outputs.short }}"
+            if ! crane manifest "$tag" >/dev/null 2>&1; then
+              echo "::error::Missing tag: $tag"
+              echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote can retag :latest."
+              exit 1
+            fi
+            echo "  ok: $tag exists"
+          done
+
+      - name: Ancestry check — refuse to promote :latest backwards
+        # #2244: workflow_run completions arrive in arbitrary order. If
+        # SHA-A and SHA-B both reach main within ~10 min and SHA-B's E2E
+        # completes before SHA-A's, this workflow can fire for SHA-A
+        # AFTER it already promoted SHA-B → :latest goes backwards. The
+        # orphan-reconciler "next run corrects it" doesn't apply: there's
+        # no auto-corrective re-promote, :latest stays wrong until the
+        # next main push lands.
+        #
+        # Detection: read current :latest's `org.opencontainers.image.revision`
+        # label (set by publish-workspace-server-image.yml at build time)
+        # and ask the GitHub compare API whether the candidate SHA is
+        # ahead-of / identical-to / behind / diverged-from current.
+        # Hard-fail on `behind` and `diverged` per the approved design —
+        # silent-bypass is the class we're moving away from. Workflow
+        # goes red, oncall sees it, operator decides how to recover
+        # (manual dispatch with the right SHA, force-promote, etc.).
+        #
+        # Manual dispatch skips this check — operator override semantics
+        # match the gate-check step above.
+        #
+        # Backward-compat: when current :latest carries no revision
+        # label (legacy image pre-publish-with-label), skip-with-warning.
+        # All :latest images on main are post-label as of 2026-04-29, so
+        # this branch will be dead within 90 days; remove then.
+        if: steps.gate.outputs.proceed == 'true' && github.event_name != 'workflow_dispatch'
+        id: ancestry
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
+          TARGET_SHA: ${{ steps.sha.outputs.full }}
+        run: |
+          set -euo pipefail
+
+          # Read the current :latest config and pull the revision label.
+          # `crane config` returns the OCI image config blob (not the manifest);
+          # labels live under `.config.Labels`. `// empty` makes jq return ""
+          # rather than the literal "null" so the test below works.
+          CURRENT_REVISION=$(crane config "${IMAGE_NAME}:latest" 2>/dev/null \
+            | jq -r '.config.Labels["org.opencontainers.image.revision"] // empty' \
+            || true)
+
+          if [ -z "$CURRENT_REVISION" ]; then
+            echo "decision=skip-no-label" >> "$GITHUB_OUTPUT"
+            {
+              echo "## ⚠ Ancestry check skipped — current :latest has no revision label"
+              echo
+              echo "Likely a legacy image built before \`org.opencontainers.image.revision\` was set."
+              echo "Falling through to retag. After all \`:latest\` images are post-label (TODO 90 days), this branch is dead and should be removed."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::warning::Current :latest carries no revision label — skipping ancestry check (legacy image)"
+            exit 0
+          fi
+
+          if [ "$CURRENT_REVISION" = "$TARGET_SHA" ]; then
+            echo "decision=identical" >> "$GITHUB_OUTPUT"
+            echo "::notice:::latest already at ${TARGET_SHA:0:7} — retag will be a no-op"
+            exit 0
+          fi
+
+          # Ask GitHub which side of the merge graph TARGET_SHA sits on
+          # relative to CURRENT_REVISION. Returns one of: ahead | identical
+          # | behind | diverged. Network or auth errors collapse to "error"
+          # via the explicit fallback so the case below always matches.
+          STATUS=$(gh api \
+            "repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}" \
+            --jq '.status' 2>/dev/null || echo "error")
+
+          echo "ancestry compare ${CURRENT_REVISION:0:7} → ${TARGET_SHA:0:7}: $STATUS"
+
+          case "$STATUS" in
+            ahead)
+              echo "decision=ahead" >> "$GITHUB_OUTPUT"
+              echo "::notice::Target ${TARGET_SHA:0:7} is ahead of current :latest (${CURRENT_REVISION:0:7}) — proceeding with retag"
+              ;;
+            identical)
+              echo "decision=identical" >> "$GITHUB_OUTPUT"
+              echo "::notice::Target identical to :latest — retag will be a no-op"
+              ;;
+            behind)
+              echo "decision=behind" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote refused — target is BEHIND current :latest"
+                echo
+                echo "| Field | Value |"
+                echo "|---|---|"
+                echo "| Target SHA | \`$TARGET_SHA\` |"
+                echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
+                echo "| GitHub compare status | \`behind\` |"
+                echo
+                echo "This guard catches the workflow_run-completion-order race (#2244):"
+                echo "two rapid main pushes whose E2Es complete out-of-order can otherwise"
+                echo "promote \`:latest\` backwards. \`:latest\` stays on \`${CURRENT_REVISION:0:7}\`."
+                echo
+                echo "**Recovery:** if this is a legitimate revert that should land on \`:latest\`,"
+                echo "manually dispatch this workflow with the target sha as input — the manual-dispatch"
+                echo "path skips the ancestry check (operator override)."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            diverged)
+              echo "decision=diverged" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❓ Auto-promote refused — history diverged"
+                echo
+                echo "| Field | Value |"
+                echo "|---|---|"
+                echo "| Target SHA | \`$TARGET_SHA\` |"
+                echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
+                echo "| GitHub compare status | \`diverged\` |"
+                echo
+                echo "Likely cause: force-push rewrote main's history, leaving the previous"
+                echo "\`:latest\` revision orphaned. Needs human review before \`:latest\` advances."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            error|*)
+              echo "decision=error" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote aborted — ancestry-check API error"
+                echo
+                echo "\`gh api repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}\` returned unexpected status: \`$STATUS\`"
+                echo
+                echo "Manual dispatch with the target sha bypasses this check."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+          esac
+
+      - name: Retag platform :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Retag tenant :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Summary
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          {
+            echo "## :latest promoted to ${{ steps.sha.outputs.short }}"
+            echo
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "- Trigger: manual dispatch"
+            else
+              echo "- Upstream: \`${{ github.event.workflow_run.name }}\` ([run](${{ github.event.workflow_run.html_url }}))"
+            fi
+            echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo
+            echo "Tenant fleet auto-pulls within 5 min via IMAGE_AUTO_REFRESH=true."
+            echo "Force immediate fanout: dispatch redeploy-tenants-on-main.yml."
+          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-promote-staging.yml
+++ b/.github/workflows/auto-promote-staging.yml
@ -0,0 +1,492 @@
+name: Auto-promote staging → main
+
+# Fires after any of the staging-branch quality gates complete. When ALL
+# required gates are green on the same staging SHA, opens (or re-uses)
+# a PR `staging → main` and schedules Gitea auto-merge so the PR lands
+# automatically once approval + status checks are satisfied.
+#
+# ============================================================
+# What this workflow does
+# ============================================================
+#
+# 1. On a workflow_run completion event for one of the staging gate
+#    workflows (CI, E2E Staging Canvas, E2E API Smoke, CodeQL),
+#    checks if the combined status on the staging head SHA is green.
+# 2. If green, opens (or re-uses) a PR `head: staging → base: main`
+#    via Gitea REST `POST /api/v1/repos/.../pulls`.
+# 3. Schedules auto-merge via `POST /api/v1/repos/.../pulls/{index}/merge`
+#    with `merge_when_checks_succeed: true`. Gitea waits for the
+#    approval requirement on `main` (`required_approvals: 1`) and
+#    the status-check gates, then merges.
+# 4. The merge commit lands on `main` and fires
+#    `publish-workspace-server-image.yml` naturally via its
+#    `on: push: branches: [main]` trigger — no explicit dispatch
+#    needed (see "Why no workflow_dispatch tail" below).
+#
+# `auto-sync-main-to-staging.yml` is the reverse-direction
+# counterpart (main → staging, fast-forward push). Together they
+# keep the staging-superset-of-main invariant tight.
+#
+# ============================================================
+# Why Gitea REST (and not `gh pr create`)
+# ============================================================
+#
+# Pre-2026-05-06 this workflow used `gh pr create`, `gh pr merge --auto`,
+# `gh run list`, and `gh workflow run` against GitHub. After the
+# GitHub→Gitea cutover those calls fail because:
+#
+#   - `gh pr create / merge / view / list` route to GitHub GraphQL
+#     (`/api/graphql`). Gitea does not expose a GraphQL endpoint;
+#     every call returns `HTTP 405 Method Not Allowed` — same root
+#     cause as #65 (auto-sync) which PR #66 fixed by dropping `gh`
+#     entirely.
+#   - `gh run list --workflow=...` GitHub-shape; Gitea has the
+#     simpler `GET /repos/.../commits/{ref}/status` combined-status
+#     endpoint instead.
+#   - `gh workflow run X.yml` calls `POST /repos/.../actions/workflows/{id}/dispatches`,
+#     which does NOT exist on Gitea 1.22.6 (verified via swagger.v1.json).
+#
+# So this workflow uses direct `curl` calls to Gitea REST. No `gh`
+# CLI dependency, no GraphQL, no missing-endpoint footgun.
+#
+# ============================================================
+# Why no workflow_dispatch tail (was load-bearing on GitHub, dead on Gitea)
+# ============================================================
+#
+# The GitHub-era version had a 60-line polling step that waited for
+# the promote PR to merge, then explicitly dispatched
+# `publish-workspace-server-image.yml` on `--ref main`. That step
+# existed because GitHub's GITHUB_TOKEN-initiated merges suppress
+# downstream `on: push` workflows (the documented "no recursion" rule
+# — https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
+# The explicit dispatch was the workaround.
+#
+# Gitea Actions does NOT have this no-recursion rule. PR #66's auto-
+# sync merge to main fired `auto-promote-staging` on the next push
+# trigger naturally. So the cascade fires on the natural push event;
+# the explicit dispatch is dead code. (And even if we wanted to
+# preserve it, Gitea has no `workflow_dispatch` REST endpoint.)
+#
+# Removed in this rewrite. If we ever observe the cascade misfire,
+# operator can push an empty commit to `main` to wake it.
+#
+# ============================================================
+# Why open a PR (and not direct push)
+# ============================================================
+#
+# `main` branch protection has `enable_push: false` with NO
+# `push_whitelist_usernames`. Direct push is impossible for any
+# persona, including admins. PR-mediated merge is the only path,
+# which is intentional: prod state mutations (and staging→main IS a
+# prod mutation, since the next deploy fans out to tenants) require
+# Hongming's approval per `feedback_prod_apply_needs_hongming_chat_go`.
+#
+# The auto-merge schedule preserves this gate: `merge_when_checks_succeed`
+# does NOT bypass `required_approvals: 1`. Gitea waits for BOTH
+# approval AND green checks before merging. Hongming reviews via the
+# canvas/chat-handle of the PR notification, approves, and Gitea
+# auto-merges within seconds.
+#
+# ============================================================
+# Identity + token (anti-bot-ring per saved-memory
+# `feedback_per_agent_gitea_identity_default`)
+# ============================================================
+#
+# This workflow uses `secrets.AUTO_SYNC_TOKEN` — a personal access
+# token issued to the `devops-engineer` Gitea persona. NOT the
+# founder PAT. The bot-ring fingerprint that triggered the GitHub
+# org suspension on 2026-05-06 was characterised by founder PAT
+# acting as CI at machine speed.
+#
+# Token scope: `push: true` (read+write) on this repo. The persona
+# can: open PRs, comment on PRs, schedule auto-merge. The persona
+# CANNOT bypass main's branch protection (`required_approvals: 1`
+# still applies — only Hongming's review unblocks merge).
+#
+# Authorship: the PR is opened by `devops-engineer`; the merge
+# commit credits Hongming-as-approver and `devops-engineer` as
+# the merger.
+#
+# ============================================================
+# Failure modes & operational notes
+# ============================================================
+#
+# A — staging gates not all green at trigger time:
+#     - The combined-status check returns `state: pending|failure`.
+#       Workflow exits 0 with a step-summary "not all green; staying
+#       on current main". Re-fires on the next gate completion.
+#
+# B — Gitea PR-create returns non-201 (e.g. 422 already-exists):
+#     - Idempotent: the workflow first GETs the existing open
+#       staging→main PR. If found, reuse it; if not, POST a new one.
+#       422 should never surface; if it does (race), step summary
+#       captures the body and the next workflow_run picks up.
+#
+# C — `merge_when_checks_succeed` schedule fails:
+#     - 422 with "Pull request is not mergeable" if there are
+#       conflicts or stale base. Step summary surfaces it; operator
+#       (or `auto-sync-main-to-staging`) needs to bring staging up
+#       to date with main first. Workflow exits 1 to surface red.
+#
+# D — `AUTO_SYNC_TOKEN` rotated / wrong scope:
+#     - 401/403 on first REST call. Step summary surfaces it.
+#       Re-issue the token from `~/.molecule-ai/personas/` on the
+#       operator host and update the repo Actions secret.
+#
+# ============================================================
+# Loop safety
+# ============================================================
+#
+# When the promote PR merges to main, `auto-sync-main-to-staging.yml`
+# fires (on:push:main) and pushes the merge commit back to staging.
+# That push to staging is by `devops-engineer`, NOT this workflow's
+# token, and triggers the staging gate workflows. When they all
+# complete, we end up back here — but the tree-diff guard catches
+# it: staging tree == main tree (the merge commit changes nothing),
+# so we skip and the cycle terminates.
+
+on:
+  workflow_run:
+    workflows:
+      - CI
+      - E2E Staging Canvas (Playwright)
+      - E2E API Smoke Test
+      - CodeQL
+    types: [completed]
+  workflow_dispatch:
+    inputs:
+      force:
+        description: "Force promote even when AUTO_PROMOTE_ENABLED is unset (manual override)"
+        required: false
+        default: "false"
+
+permissions:
+  contents: read
+  pull-requests: write
+
+# Serialize auto-promote runs. Multiple staging gate completions can land
+# in quick succession (CI + E2E + CodeQL all finish within seconds of
+# each other on a green PR) — without this, two parallel runs both:
+#   1. Would race the GET-or-POST PR step.
+#   2. Would both call merge-schedule (idempotent — fine on Gitea).
+# cancel-in-progress: false because the second run on a fresh staging
+# tip should NOT kill the first which has already opened the PR.
+concurrency:
+  group: auto-promote-staging
+  cancel-in-progress: false
+
+jobs:
+  check-all-gates-green:
+    # Only consider staging pushes. PRs into staging don't promote.
+    if: >
+      (github.event_name == 'workflow_run' &&
+       github.event.workflow_run.head_branch == 'staging' &&
+       github.event.workflow_run.event == 'push')
+      || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    outputs:
+      all_green: ${{ steps.gates.outputs.all_green }}
+      head_sha: ${{ steps.gates.outputs.head_sha }}
+    steps:
+      # Skip empty-tree promotes (the perpetual auto-promote↔auto-sync
+      # cycle observed pre-cutover on GitHub). On Gitea the cycle shape
+      # is different (auto-sync uses fast-forward, no merge commit),
+      # but the tree-diff guard is cheap insurance and protects against
+      # any future merge-style regression.
+      - name: Checkout for tree-diff check
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          ref: staging
+
+      - name: Skip if staging tree == main tree (cycle-break safety)
+        id: tree-diff
+        env:
+          HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
+        run: |
+          set -eu
+          git fetch origin main --depth=50 || { echo "::warning::git fetch main failed — proceeding (fail-open)"; exit 0; }
+          if git diff --quiet origin/main "$HEAD_SHA" -- 2>/dev/null; then
+            {
+              echo "## Skipped — no code to promote"
+              echo
+              echo "staging tip (\`${HEAD_SHA:0:8}\`) and \`main\` have identical trees."
+              echo "Skipping to avoid opening an empty promote PR."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::notice::auto-promote: staging tree == main tree — no code to promote, skipping"
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Check combined status on staging head
+        if: steps.tree-diff.outputs.skip != 'true'
+        id: gates
+        env:
+          GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+          HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
+          REPO: ${{ github.repository }}
+          GITEA_HOST: ${{ vars.GITEA_HOST || 'https://git.moleculesai.app' }}
+        run: |
+          set -euo pipefail
+
+          # Gitea-native combined-status endpoint aggregates every
+          # check context attached to a SHA. This is structurally
+          # cleaner than the GitHub-era per-workflow `gh run list`
+          # loop because:
+          #
+          #   1. There's no risk of "workflow name collision" (the
+          #      GitHub-era code had to switch from `--workflow=NAME`
+          #      to `--workflow=FILE.YML` to disambiguate "CodeQL"
+          #      between the explicit workflow and GitHub's UI-
+          #      configured default setup; Gitea has no such
+          #      duplicate-name surface).
+          #   2. Gitea's combined state already encodes the AND
+          #      across all contexts: success only if EVERY context
+          #      is success. Pending or failure on any context
+          #      produces non-success state.
+          #
+          # See https://docs.gitea.com/api/1.22 for the schema —
+          # `state` is one of: success, pending, failure, error.
+
+          echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT"
+          echo "Checking combined status on SHA ${HEAD_SHA}"
+
+          # `set +o pipefail` for the http-code capture pattern; restore
+          # immediately. Pattern hardened per `feedback_curl_status_capture_pollution`.
+          BODY_FILE=$(mktemp)
+          set +e
+          STATUS=$(curl -sS \
+            -H "Authorization: token ${GITEA_TOKEN}" \
+            -H "Accept: application/json" \
+            -o "${BODY_FILE}" \
+            -w "%{http_code}" \
+            "${GITEA_HOST}/api/v1/repos/${REPO}/commits/${HEAD_SHA}/status")
+          CURL_RC=$?
+          set -e
+
+          if [ "${CURL_RC}" -ne 0 ] || [ "${STATUS}" != "200" ]; then
+            echo "::error::combined-status fetch failed: curl=${CURL_RC} http=${STATUS}"
+            cat "${BODY_FILE}" | head -c 500 || true
+            rm -f "${BODY_FILE}"
+            echo "all_green=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          STATE=$(jq -r '.state // "missing"' < "${BODY_FILE}")
+          TOTAL=$(jq -r '.total_count // 0' < "${BODY_FILE}")
+          rm -f "${BODY_FILE}"
+
+          echo "Combined status: state=${STATE} total_count=${TOTAL}"
+
+          if [ "${STATE}" = "success" ] && [ "${TOTAL}" -gt 0 ]; then
+            echo "all_green=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::All gates green on ${HEAD_SHA} (${TOTAL} contexts)"
+          else
+            echo "all_green=false" >> "$GITHUB_OUTPUT"
+            {
+              echo "## Not promoting — combined status not green"
+              echo
+              echo "- SHA: \`${HEAD_SHA:0:8}\`"
+              echo "- Combined state: \`${STATE}\`"
+              echo "- Context count: ${TOTAL}"
+              echo
+              echo "Will re-fire on the next gate completion. Investigate any red gate via the Actions UI."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::notice::auto-promote: combined status is ${STATE} on ${HEAD_SHA} — staying on current main"
+          fi
+
+  promote:
+    needs: check-all-gates-green
+    if: needs.check-all-gates-green.outputs.all_green == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check rollout gate
+        env:
+          AUTO_PROMOTE_ENABLED: ${{ vars.AUTO_PROMOTE_ENABLED }}
+          FORCE_INPUT: ${{ github.event.inputs.force }}
+        run: |
+          set -eu
+          # Repo variable AUTO_PROMOTE_ENABLED=true flips this on. While
+          # it's unset, the workflow dry-runs (logs what it would have
+          # done) but doesn't open the promote PR. Set the variable in
+          # Settings → Actions → Variables.
+          if [ "${AUTO_PROMOTE_ENABLED:-}" != "true" ] && [ "${FORCE_INPUT:-false}" != "true" ]; then
+            {
+              echo "## Auto-promote disabled"
+              echo
+              echo "Repo variable \`AUTO_PROMOTE_ENABLED\` is not set to \`true\`."
+              echo "All gates are green on staging; would have opened a promote PR to \`main\`."
+              echo
+              echo "To enable: Settings → Actions → Variables → \`AUTO_PROMOTE_ENABLED=true\`."
+              echo "To test once manually: workflow_dispatch with \`force=true\`."
+            } >> "$GITHUB_STEP_SUMMARY"
+            echo "::notice::auto-promote disabled — dry run only"
+            exit 0
+          fi
+
+      - name: Open or reuse promote PR + schedule auto-merge
+        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
+        env:
+          GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+          REPO: ${{ github.repository }}
+          TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
+          GITEA_HOST: ${{ vars.GITEA_HOST || 'https://git.moleculesai.app' }}
+        run: |
+          set -euo pipefail
+
+          API="${GITEA_HOST}/api/v1/repos/${REPO}"
+          AUTH=(-H "Authorization: token ${GITEA_TOKEN}" -H "Accept: application/json")
+
+          # http_status_get RESULT_VAR URL
+          # Sets RESULT_VAR to "<http_code>:<body_file>". Curl status
+          # capture pattern per `feedback_curl_status_capture_pollution`:
+          # http_code goes to its own tempfile-equivalent (-w), body to
+          # another tempfile, set +e/-e bracket protects pipeline state.
+          http_get() {
+            local body_file="$1"; shift
+            local url="$1"; shift
+            set +e
+            local code
+            code=$(curl -sS "${AUTH[@]}" -o "${body_file}" -w "%{http_code}" "${url}")
+            local rc=$?
+            set -e
+            if [ "${rc}" -ne 0 ]; then
+              echo "::error::curl GET failed (rc=${rc}) on ${url}"
+              return 99
+            fi
+            echo "${code}"
+          }
+          http_post_json() {
+            local body_file="$1"; shift
+            local data="$1"; shift
+            local url="$1"; shift
+            set +e
+            local code
+            code=$(curl -sS "${AUTH[@]}" -H "Content-Type: application/json" \
+              -X POST -d "${data}" -o "${body_file}" -w "%{http_code}" "${url}")
+            local rc=$?
+            set -e
+            if [ "${rc}" -ne 0 ]; then
+              echo "::error::curl POST failed (rc=${rc}) on ${url}"
+              return 99
+            fi
+            echo "${code}"
+          }
+
+          # Step 1: look for an existing open staging→main promote PR
+          # (idempotent on workflow re-run). Gitea doesn't have a
+          # head/base filter on the list endpoint that's as ergonomic
+          # as gh's, but the dedicated `/pulls/{base}/{head}` lookup
+          # works.
+          BODY=$(mktemp)
+          STATUS=$(http_get "${BODY}" "${API}/pulls/main/staging") || true
+
+          PR_NUM=""
+          if [ "${STATUS}" = "200" ]; then
+            STATE=$(jq -r '.state // "missing"' < "${BODY}")
+            if [ "${STATE}" = "open" ]; then
+              PR_NUM=$(jq -r '.number // ""' < "${BODY}")
+              echo "::notice::Re-using existing open promote PR #${PR_NUM}"
+            fi
+          fi
+          rm -f "${BODY}"
+
+          # Step 2: if no open PR, create one.
+          if [ -z "${PR_NUM}" ]; then
+            TITLE="staging → main: auto-promote ${TARGET_SHA:0:7}"
+            BODY_TEXT=$(cat <<EOFBODY
+          Automated promotion of \`staging\` (\`${TARGET_SHA:0:8}\`) to \`main\`. All required staging gates are green at this SHA (combined status reported success).
+
+          This PR is auto-generated by \`.github/workflows/auto-promote-staging.yml\` whenever every required gate completes green on the same staging SHA.
+
+          **Approval gate:** \`main\` branch protection requires 1 approval before this can land. Once approved, Gitea will auto-merge (the workflow scheduled \`merge_when_checks_succeed: true\` immediately after open).
+
+          The reverse-direction sync (the merge commit on \`main\` → \`staging\`) is handled automatically by \`auto-sync-main-to-staging.yml\` after this PR lands.
+
+          ---
+          - Source: staging at \`${TARGET_SHA}\`
+          - Opened by: \`devops-engineer\` persona (anti-bot-ring; never founder PAT)
+          - Refs: #65, #73, #195
+          EOFBODY
+          )
+            REQ=$(jq -n \
+              --arg title "${TITLE}" \
+              --arg body "${BODY_TEXT}" \
+              --arg base "main" \
+              --arg head "staging" \
+              '{title:$title, body:$body, base:$base, head:$head}')
+
+            BODY=$(mktemp)
+            STATUS=$(http_post_json "${BODY}" "${REQ}" "${API}/pulls")
+
+            if [ "${STATUS}" = "201" ]; then
+              PR_NUM=$(jq -r '.number // ""' < "${BODY}")
+              echo "::notice::Opened promote PR #${PR_NUM}"
+            else
+              echo "::error::Failed to create promote PR: HTTP ${STATUS}"
+              jq -r '.message // .' < "${BODY}" | head -c 500
+              rm -f "${BODY}"
+              exit 1
+            fi
+            rm -f "${BODY}"
+          fi
+
+          # Step 3: schedule auto-merge. merge_when_checks_succeed
+          # tells Gitea to wait for both:
+          #   - all required status checks to pass
+          #   - the required-approvals gate (1 approval on main)
+          # before merging. On approval+green, Gitea merges within
+          # seconds. On any check failing or approval being denied,
+          # the schedule stays armed but doesn't fire.
+          #
+          # Idempotent: re-arming on an already-armed PR is a no-op.
+          REQ=$(jq -n '{Do:"merge", merge_when_checks_succeed:true}')
+          BODY=$(mktemp)
+          STATUS=$(http_post_json "${BODY}" "${REQ}" "${API}/pulls/${PR_NUM}/merge")
+
+          # Gitea returns:
+          #   - 200/204 on successful immediate merge (gates already green AND approved)
+          #   - 405 "Please try again later" when scheduled successfully but waiting
+          #   - 422 on "Pull request is not mergeable" (conflict, stale base, etc.)
+          #
+          # 405 here is benign — Gitea's way of saying "scheduled, not merging now".
+          # We treat 200/204/405 as success, anything else as failure.
+          case "${STATUS}" in
+            200|204)
+              MERGE_OUTCOME="merged-immediately"
+              echo "::notice::Promote PR #${PR_NUM} merged immediately (gates+approval already green)"
+              ;;
+            405)
+              MERGE_OUTCOME="auto-merge-scheduled"
+              echo "::notice::Promote PR #${PR_NUM}: auto-merge scheduled (Gitea will land on approval+green)"
+              ;;
+            422)
+              MERGE_OUTCOME="not-mergeable"
+              echo "::warning::Promote PR #${PR_NUM}: not mergeable (conflict, stale base, or already merging)."
+              jq -r '.message // .' < "${BODY}" | head -c 500
+              ;;
+            *)
+              echo "::error::Unexpected status ${STATUS} on merge schedule"
+              jq -r '.message // .' < "${BODY}" | head -c 500
+              rm -f "${BODY}"
+              exit 1
+              ;;
+          esac
+          rm -f "${BODY}"
+
+          {
+            echo "## Auto-promote PR opened"
+            echo
+            echo "- Source: staging at \`${TARGET_SHA:0:8}\`"
+            echo "- PR: #${PR_NUM}"
+            echo "- Outcome: \`${MERGE_OUTCOME}\`"
+            echo
+            if [ "${MERGE_OUTCOME}" = "auto-merge-scheduled" ]; then
+              echo "Gitea will auto-merge once Hongming approves and all checks are green. No human action needed beyond approval."
+            elif [ "${MERGE_OUTCOME}" = "merged-immediately" ]; then
+              echo "Merged immediately. \`publish-workspace-server-image.yml\` will fire naturally on the resulting \`main\` push."
+            else
+              echo "PR is not auto-merging. Operator may need to bring staging up to date with main, then re-trigger this workflow via workflow_dispatch."
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-promote-stale-alarm.yml
+++ b/.github/workflows/auto-promote-stale-alarm.yml
@ -0,0 +1,83 @@
+name: auto-promote-stale-alarm
+
+# Hourly cron + on-demand alarm for the silent-block failure mode that
+# motivated issue #2975:
+#   - The auto-promote-staging.yml workflow opened a PR + armed
+#     auto-merge, but main's branch protection requires a human review
+#     (reviewDecision=REVIEW_REQUIRED). The PR sat BLOCKED with no
+#     surface-up-the-stack for 12+ hours, holding 25 commits hostage
+#     including the Memory v2 redesign and a reno-stars data-loss fix.
+#
+# This workflow runs `scripts/check-stale-promote-pr.sh` against the
+# repo's open auto-promote PRs (base=main head=staging). When a PR has
+# been BLOCKED on REVIEW_REQUIRED for >4h, it:
+#   1. Emits a workflow-level warning (visible in run summary + the
+#      Actions UI feed).
+#   2. Posts a comment on the PR (idempotent — one alarm per PR).
+#
+# The detection logic lives in scripts/check-stale-promote-pr.sh so
+# it's unit-testable with stubbed `gh` (see test-check-stale-promote-pr.sh).
+# This file is the schedule + invocation surface only — SSOT for the
+# detector itself.
+
+on:
+  schedule:
+    # Hourly. Cheap (one `gh pr list` + jq), and 1h granularity is
+    # plenty for a 4h staleness threshold — operators see the alarm
+    # within at most 1h of crossing the threshold.
+    - cron: "27 * * * *"  # at :27 to dodge the cron herd at :00
+  workflow_dispatch:
+    inputs:
+      stale_hours:
+        description: "Hours after which a BLOCKED+REVIEW_REQUIRED PR is stale (default 4)"
+        required: false
+        default: "4"
+      post_comment:
+        description: "Post a comment on stale PRs (default true)"
+        required: false
+        default: "true"
+
+permissions:
+  contents: read
+  pull-requests: write  # post comments on stale PRs
+
+# Serialize so the on-demand and scheduled runs don't double-comment
+# the same PR. cancel-in-progress=false because the script is idempotent
+# (existing comment marker prevents dupes), but a scheduled run firing
+# while a manual one runs would just re-list the same PR set.
+concurrency:
+  group: auto-promote-stale-alarm
+  cancel-in-progress: false
+
+jobs:
+  scan:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout (need scripts/ only)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          sparse-checkout: |
+            scripts/check-stale-promote-pr.sh
+          sparse-checkout-cone-mode: false
+      - name: Run stale-PR detector
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          STALE_HOURS: ${{ inputs.stale_hours || '4' }}
+          POST_COMMENT: ${{ inputs.post_comment || 'true' }}
+        run: |
+          # The script's exit code reflects the count of stale PRs.
+          # We don't want a stale finding to fail the workflow run —
+          # the warning + comment are the signal, the green/red is
+          # noise. So convert any non-zero exit to a workflow notice
+          # and exit 0.
+          set +e
+          bash scripts/check-stale-promote-pr.sh
+          rc=$?
+          set -e
+          if [ "$rc" -ne 0 ]; then
+            echo "::notice::Stale PR detector found $rc PR(s) needing attention. See warnings above + comments on the PRs."
+          fi
+          # Always succeed — operator-facing surface is the warning,
+          # not the workflow status.
+          exit 0
--- a/.github/workflows/auto-sync-canary.yml
+++ b/.github/workflows/auto-sync-canary.yml
@ -0,0 +1,404 @@
+name: Auto-sync canary — AUTO_SYNC_TOKEN rotation drift
+
+# Synthetic health check for the AUTO_SYNC_TOKEN secret consumed by
+# auto-sync-main-to-staging.yml (PR #66) and publish-workspace-server-image.yml.
+#
+# ============================================================
+# Why this workflow exists
+# ============================================================
+#
+# PR #66 fixed auto-sync (replaced GitHub-era `gh pr create` — which
+# 405s on Gitea's GraphQL endpoint — with a direct git push from the
+# `devops-engineer` persona's `AUTO_SYNC_TOKEN`). Hostile self-review
+# weakest spot #3 of that PR:
+#
+#   "Token rotation silently breaks auto-sync. If AUTO_SYNC_TOKEN is
+#    rotated without updating the repo secret, every push to main
+#    fails red on the auto-sync push step. The workflow surfaces the
+#    failure mode in the step summary (failure mode B in the header),
+#    but there's no proactive monitoring."
+#
+# Detection latency under the status quo: rotation is only caught on
+# the next push to `main`. During quiet periods (no main push for
+# many hours) the staging-superset-of-main invariant silently breaks.
+#
+# This workflow closes the gap: every 6 hours, it fires the auth
+# surface that auto-sync depends on and emits a red workflow status
+# if AUTO_SYNC_TOKEN has drifted out of validity.
+#
+# ============================================================
+# What this checks (Option B — read-only verify)
+# ============================================================
+#
+# 1. `GET /api/v1/user` against Gitea with the token → validates the
+#    token authenticates AND resolves to `devops-engineer` (catches
+#    the case where the token was regenerated under a different
+#    persona by mistake).
+# 2. `GET /api/v1/repos/molecule-ai/molecule-core` with the token →
+#    validates the token has `read:repository` scope on this repo
+#    (the v2 scope contract — see saved memory
+#    `reference_persona_token_v2_scope`).
+# 3. `git push --dry-run` of the current staging SHA back to
+#    `refs/heads/staging` via `https://oauth2:<token>@<gitea>/...`
+#    → validates the EXACT HTTPS basic-auth path that
+#    `actions/checkout` + `git push origin staging` use inside
+#    auto-sync-main-to-staging.yml. NOP by construction (push the
+#    current tip to itself = "Everything up-to-date"); auth is
+#    checked at the smart-protocol handshake BEFORE the empty-diff
+#    computation, so bad token → exit 128 with "Authentication
+#    failed". `git ls-remote` is NOT used here because Gitea
+#    falls back to anonymous read on public repos and would
+#    silently green-light a rotated token.
+#
+# Each step exits non-zero with an actionable error message if it
+# fails. The workflow status itself is the operator-facing surface.
+#
+# ============================================================
+# What this does NOT check (intentional)
+# ============================================================
+#
+# - **Branch-protection authz** (failure mode C in auto-sync header):
+#   would require an actual write to staging. Already monitored by
+#   `branch-protection-drift.yml` daily. Don't duplicate.
+# - **Conflict resolution** (failure mode A): a real conflict is data-
+#   driven, not auth-driven; can't synthesise it without polluting
+#   staging. Already surfaces immediately on the next main push.
+# - **Concurrency** (failure mode D): handled by workflow concurrency
+#   group on auto-sync, not a credential issue.
+#
+# ============================================================
+# Why Option B (read-only) and not the alternatives
+# ============================================================
+#
+# Considered + rejected (see issue #72 for full write-up):
+#
+# - **Option A — full auto-sync on schedule**: every run creates a
+#   no-op merge commit on staging when main hasn't advanced. 4 noise
+#   commits/day. And races the real `push:` trigger when main has
+#   advanced. Rejected.
+#
+# - **Option C — push to dedicated `auto-sync-canary` branch**: would
+#   exercise authz too, but adds branch noise on Gitea AND requires
+#   maintaining a second branch protection (or expanding staging's
+#   whitelist to a junk branch). Authz already covered by
+#   `branch-protection-drift.yml`. Rejected.
+#
+# Prior art for the chosen Option B shape:
+#   - Cloudflare's `/user/tokens/verify` endpoint (read-only auth
+#     probe explicitly designed for credential canaries).
+#   - AWS Secrets Manager rotation Lambda's `testSecret` step (auth
+#     probe before promoting AWSPENDING → AWSCURRENT).
+#   - HashiCorp Vault's `vault token lookup` for renewal canaries.
+#
+# ============================================================
+# Operator runbook — what to do when this workflow goes RED
+# ============================================================
+#
+# 1. **Identify which step failed**:
+#    - Step "Verify token authenticates as devops-engineer" red →
+#      token is invalid OR resolves to wrong persona.
+#    - Step "Verify token has repo read scope" red → token valid but
+#      stripped of `read:repository` scope (or repo perms changed).
+#    - Step "Verify git HTTPS auth path via no-op dry-run push to
+#      staging" red → token rotated/revoked OR Gitea git-HTTPS
+#      surface is broken (rare). Auth check happens on the
+#      smart-protocol handshake, separate from the API path.
+#
+# 2. **Re-issue the token** on the operator host:
+#    ```
+#    ssh root@5.78.80.188 'docker exec --user git molecule-gitea-1 \
+#      gitea admin user generate-access-token \
+#      --username devops-engineer \
+#      --token-name persona-devops-engineer-vN \
+#      --scopes "read:repository,write:repository,read:user,read:organization,read:issue,write:issue,read:notification,read:misc"'
+#    ```
+#    Update `/etc/molecule-bootstrap/agent-secrets.env` in place
+#    (per `feedback_unified_credentials_file`). The previous token
+#    file lands at `.bak.<date>`.
+#
+# 3. **Update the repo Actions secret** at:
+#    Settings → Secrets and variables → Actions → AUTO_SYNC_TOKEN
+#    Paste the new token. (Don't echo it in chat — but per
+#    `feedback_passwords_in_chat_are_burned`, a paste in a 1:1
+#    Claude session is within trust boundary.)
+#
+# 4. **Re-run this canary** via workflow_dispatch. Confirm GREEN.
+#
+# 5. **Backfill any missed main → staging syncs** by re-running
+#    `auto-sync-main-to-staging.yml` from its workflow_dispatch
+#    surface, OR by pushing an empty commit to main (if you'd
+#    rather force a real trigger).
+#
+# ============================================================
+# Security notes
+# ============================================================
+#
+# - Token usage: read-only (`GET /api/v1/user`, `GET /api/v1/repos/...`,
+#   `git ls-remote`). No write paths. Same blast-radius profile as
+#   `actions/checkout` on a public repo.
+# - The token NEVER appears in logs: every `curl` uses a header
+#   variable, never inline; the `git ls-remote` URL builds the
+#   `oauth2:$TOKEN@host` form into a single env var that's not
+#   echoed. GitHub Actions secret-masking covers anything that does
+#   slip through.
+# - No new token introduced — same `AUTO_SYNC_TOKEN` the workflow
+#   under monitor uses. Per least-privilege we deliberately do NOT
+#   broaden scope for the canary.
+
+on:
+  schedule:
+    # Every 6 hours at :17 (offsets the cron herd at :00). Justification
+    # from issue #72: cheap to run (~5s wall-clock, no quota), 3h average
+    # detection latency, 6h max. 1h would be 24× the runs for marginal
+    # benefit; daily would be 6× longer latency and worse than status
+    # quo on a quiet-main day.
+    - cron: '17 */6 * * *'
+  workflow_dispatch:
+
+# No concurrency group needed — the canary is read-only and idempotent.
+# Two parallel runs (e.g. operator dispatch during a scheduled tick) are
+# harmless: same result, doubled HTTPS calls, no shared state.
+
+permissions:
+  contents: read
+
+jobs:
+  verify-token:
+    name: Verify AUTO_SYNC_TOKEN validity
+    runs-on: ubuntu-latest
+    # 2 min surfaces hangs (Gitea API stall, DNS issue) within one
+    # cron interval. Realistic worst case is ~10s: 2 curls + 1 git
+    # ls-remote, each capped by the explicit timeouts below.
+    timeout-minutes: 2
+
+    env:
+      # Pinned in env so individual steps can read it without
+      # repeating the secret reference. GitHub masks the value in
+      # logs automatically.
+      AUTO_SYNC_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+      # MUST stay in sync with auto-sync-main-to-staging.yml's
+      # `git config user.name "devops-engineer"` line. Renaming the
+      # devops-engineer persona requires updating both files (and
+      # the staging branch protection's `push_whitelist_usernames`).
+      EXPECTED_PERSONA: devops-engineer
+      GITEA_HOST: git.moleculesai.app
+      REPO_PATH: molecule-ai/molecule-core
+
+    steps:
+      - name: Verify AUTO_SYNC_TOKEN secret is configured
+        # Schedule-vs-dispatch behaviour split, per
+        # `feedback_schedule_vs_dispatch_secrets_hardening`:
+        #
+        #   - schedule: hard-fail when the secret is missing. The
+        #     whole point of the canary is to surface drift; soft-
+        #     skipping on missing-secret would make the canary
+        #     itself drift-invisible (sweep-cf-orphans #2088 lesson).
+        #   - workflow_dispatch: hard-fail too — there's no scenario
+        #     where an operator wants this canary to silently no-op.
+        #     The workflow has no other ad-hoc utility; if you ran
+        #     it, you wanted the answer.
+        run: |
+          if [ -z "${AUTO_SYNC_TOKEN}" ]; then
+            echo "::error::AUTO_SYNC_TOKEN secret is not set on this repo." >&2
+            echo "::error::Set it at Settings → Secrets and variables → Actions." >&2
+            echo "::error::Without it, auto-sync-main-to-staging.yml will fail every push to main." >&2
+            exit 1
+          fi
+          echo "AUTO_SYNC_TOKEN is configured (value masked)."
+
+      - name: Verify token authenticates as ${{ env.EXPECTED_PERSONA }}
+        # Calls Gitea's `/api/v1/user` — the canonical
+        # auth-probe-with-no-side-effects endpoint (mirrors
+        # Cloudflare's /user/tokens/verify).
+        #
+        # Failure surfaces:
+        #   - HTTP 401: token invalid (rotated, revoked, or never
+        #     correctly registered).
+        #   - HTTP 200 but username != devops-engineer: token was
+        #     regenerated under the wrong persona — this would let
+        #     auth pass but commit attribution would be wrong, and
+        #     branch-protection authz would fail because only
+        #     `devops-engineer` is whitelisted.
+        run: |
+          set -euo pipefail
+          response_file="$(mktemp)"
+          code_file="$(mktemp)"
+          # `--max-time 30`: full call ceiling. `--connect-timeout 10`:
+          # DNS + TCP. `-w "%{http_code}"` routed to a tempfile so curl's
+          # exit code can't pollute the captured status — see
+          # feedback_curl_status_capture_pollution + the
+          # `lint-curl-status-capture.yml` gate that rejects the unsafe
+          # `$(curl ... || echo "000")` shape.
+          set +e
+          curl -sS -o "$response_file" \
+            --max-time 30 --connect-timeout 10 \
+            -w "%{http_code}" \
+            -H "Authorization: token ${AUTO_SYNC_TOKEN}" \
+            -H "Accept: application/json" \
+            "https://${GITEA_HOST}/api/v1/user" >"$code_file" 2>/dev/null
+          set -e
+          status=$(cat "$code_file" 2>/dev/null || true)
+          [ -z "$status" ] && status="000"
+
+          if [ "$status" != "200" ]; then
+            echo "::error::Token rotation suspected: GET /api/v1/user returned HTTP $status (expected 200)." >&2
+            echo "::error::Likely cause: AUTO_SYNC_TOKEN has been rotated/revoked on Gitea but the repo Actions secret was not updated." >&2
+            echo "::error::Runbook: see header comment of this workflow file." >&2
+            # Print response body but redact anything that looks like a token.
+            sed -E 's/[A-Fa-f0-9]{32,}/<redacted>/g' "$response_file" >&2 || true
+            exit 1
+          fi
+
+          username=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('login',''))" "$response_file")
+          if [ "$username" != "${EXPECTED_PERSONA}" ]; then
+            echo "::error::Token resolves to user '$username', expected '${EXPECTED_PERSONA}'." >&2
+            echo "::error::AUTO_SYNC_TOKEN must be the devops-engineer persona PAT (not founder PAT, not another persona)." >&2
+            echo "::error::Auto-sync push will fail because only 'devops-engineer' is whitelisted on staging branch protection." >&2
+            exit 1
+          fi
+          echo "Token authenticates as: $username ✓"
+
+      - name: Verify token has repo read scope
+        # `GET /api/v1/repos/<owner>/<repo>` requires `read:repository`
+        # on the persona's v2 scope contract. If the scope was
+        # narrowed/dropped on rotation we catch it here, before the
+        # next main push reveals it via a checkout failure.
+        run: |
+          set -euo pipefail
+          response_file="$(mktemp)"
+          code_file="$(mktemp)"
+          # See first probe step for the rationale on the tempfile-routed
+          # `-w "%{http_code}"` pattern — the unsafe `|| echo "000"` shape
+          # is rejected by lint-curl-status-capture.yml.
+          set +e
+          curl -sS -o "$response_file" \
+            --max-time 30 --connect-timeout 10 \
+            -w "%{http_code}" \
+            -H "Authorization: token ${AUTO_SYNC_TOKEN}" \
+            -H "Accept: application/json" \
+            "https://${GITEA_HOST}/api/v1/repos/${REPO_PATH}" >"$code_file" 2>/dev/null
+          set -e
+          status=$(cat "$code_file" 2>/dev/null || true)
+          [ -z "$status" ] && status="000"
+
+          if [ "$status" != "200" ]; then
+            echo "::error::Token lacks read:repository scope on ${REPO_PATH}: HTTP $status." >&2
+            echo "::error::Auto-sync's actions/checkout step will fail with this token." >&2
+            echo "::error::Re-issue with v2 scope contract: read:repository,write:repository,read:user,read:organization,read:issue,write:issue,read:notification,read:misc" >&2
+            sed -E 's/[A-Fa-f0-9]{32,}/<redacted>/g' "$response_file" >&2 || true
+            exit 1
+          fi
+          echo "Token has read:repository on ${REPO_PATH} ✓"
+
+      - name: Verify git HTTPS auth path via no-op dry-run push to staging
+        # Final probe: exercise the EXACT auth path that
+        # `actions/checkout` + `git push origin staging` use in
+        # auto-sync-main-to-staging.yml. Gitea's API and git-HTTPS
+        # surfaces share the token-lookup code path internally but
+        # the wire-level error shapes differ — historically (#173)
+        # the API path was healthy while git-HTTPS rejected, so
+        # checking only the API would have given false-green.
+        #
+        # IMPORTANT: `git ls-remote` on a public repo (which
+        # molecule-core is) succeeds even with a junk token because
+        # Gitea falls back to anonymous-read. `ls-remote` therefore
+        # CANNOT validate auth on this surface. We use
+        # `git push --dry-run` instead — push is auth-gated even on
+        # public repos.
+        #
+        # NOP shape: read the current staging SHA via authenticated
+        # ls-remote (the SHA itself is public; auth is incidental
+        # here, used only to colocate the discovery in one step), then
+        # `git push --dry-run <SHA>:refs/heads/staging`. Pushing the
+        # current tip back to itself is "Everything up-to-date" with
+        # exit 0 when auth succeeds. With a bad token Gitea returns
+        # HTTP 401 in the smart-protocol handshake and git exits 128
+        # with "Authentication failed".
+        #
+        # The dry-run never reaches Gitea's pre-receive hook (which
+        # is where branch-protection authz runs), so this probe does
+        # not validate failure mode C. That's intentional —
+        # branch-protection-drift.yml owns authz monitoring; this
+        # canary owns auth.
+        env:
+          # Don't hang waiting for password prompt if auth fails on a
+          # terminal-attached run. (In Actions there's no terminal,
+          # but the env-var hardens against an interactive runner
+          # config.)
+          GIT_TERMINAL_PROMPT: "0"
+        run: |
+          set -euo pipefail
+          # Token is in $AUTO_SYNC_TOKEN (job-level env). Compose the
+          # URL as a local var that's never echoed.
+          url="https://oauth2:${AUTO_SYNC_TOKEN}@${GITEA_HOST}/${REPO_PATH}"
+
+          # Step a: read current staging SHA. ~1KB; auth-gated only
+          # on private repos but always works on public — used here
+          # only to discover the SHA, not to validate auth.
+          staging_ref=$(timeout 30s git ls-remote --refs "$url" refs/heads/staging 2>&1) || {
+            redacted=$(echo "$staging_ref" | sed -E "s|oauth2:[^@]+@|oauth2:<redacted>@|g")
+            echo "::error::ls-remote against staging failed (network/DNS issue):" >&2
+            echo "$redacted" >&2
+            exit 1
+          }
+          if ! echo "$staging_ref" | grep -qE '^[0-9a-f]{40}[[:space:]]+refs/heads/staging$'; then
+            echo "::error::ls-remote returned unexpected shape:" >&2
+            echo "$staging_ref" | sed -E "s|oauth2:[^@]+@|oauth2:<redacted>@|g" >&2
+            exit 1
+          fi
+          staging_sha=$(echo "$staging_ref" | awk '{print $1}')
+
+          # Step b: spin up an ephemeral local repo. `git push` always
+          # requires a local repo even when pushing a remote SHA that
+          # isn't in the local object DB (the protocol negotiates and
+          # discovers we don't need to send any objects). We don't use
+          # `actions/checkout` for this — it would clone the whole
+          # repo (~hundreds of MB) for what's essentially `git init`.
+          tmp_repo="$(mktemp -d)"
+          trap 'rm -rf "$tmp_repo"' EXIT
+          git -C "$tmp_repo" init -q
+          # Author config required for any git operation; values are
+          # arbitrary because nothing gets committed here.
+          git -C "$tmp_repo" config user.email canary@auto-sync.local
+          git -C "$tmp_repo" config user.name auto-sync-canary
+
+          # Step c: dry-run push the current staging SHA back to
+          # staging. NOP by construction — the remote tip equals the
+          # SHA we're pushing, so "Everything up-to-date" is the
+          # success path.
+          #
+          # Authentication is checked at the smart-protocol handshake,
+          # BEFORE the dry-run can compute an empty diff. Bad token
+          # → "Authentication failed", exit 128. Good token → exit 0.
+          set +e
+          push_out=$(timeout 30s git -C "$tmp_repo" push --dry-run "$url" "${staging_sha}:refs/heads/staging" 2>&1)
+          push_rc=$?
+          set -e
+
+          if [ "$push_rc" -ne 0 ]; then
+            redacted=$(echo "$push_out" | sed -E "s|oauth2:[^@]+@|oauth2:<redacted>@|g")
+            echo "::error::Token rotation suspected: git push --dry-run against staging failed via the AUTO_SYNC_TOKEN HTTPS auth path (exit $push_rc)." >&2
+            echo "::error::This is the EXACT auth path that actions/checkout + git push use in auto-sync-main-to-staging.yml." >&2
+            echo "::error::Likely cause: AUTO_SYNC_TOKEN was rotated/revoked on Gitea but the repo Actions secret was not updated. Runbook: see header." >&2
+            echo "$redacted" >&2
+            exit 1
+          fi
+
+          echo "git HTTPS auth path: NOP push --dry-run to staging → ${staging_sha:0:8} ✓"
+
+      - name: Summarise canary result
+        # Everything passed — surface a green summary. (Failures
+        # already wrote ::error:: lines and exited above; if we got
+        # here, all three probes passed.)
+        run: |
+          {
+            echo "## Auto-sync canary: GREEN"
+            echo ""
+            echo "AUTO_SYNC_TOKEN is healthy:"
+            echo "- Authenticates as \`${EXPECTED_PERSONA}\` ✓"
+            echo "- Has \`read:repository\` scope on \`${REPO_PATH}\` ✓"
+            echo "- Git HTTPS auth path: no-op dry-run push to \`refs/heads/staging\` succeeds ✓"
+            echo ""
+            echo "Auto-sync main → staging will succeed on the next push to main."
+            echo "If this canary ever goes RED, see the runbook in this workflow's header."
+          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-sync-main-to-staging.yml
+++ b/.github/workflows/auto-sync-main-to-staging.yml
@ -0,0 +1,255 @@
+name: Auto-sync main → staging
+
+# Reflects every push to `main` back onto `staging` so the
+# staging-as-superset-of-main invariant holds.
+#
+# ============================================================
+# What this workflow does
+# ============================================================
+#
+# On every push to `main`:
+#   1. Checks if staging already contains main → no-op.
+#   2. Fetches both branches, merges main into staging in the
+#      runner workspace (fast-forward if possible, else
+#      `--no-ff` merge commit).
+#   3. Pushes staging directly to origin via the
+#      `devops-engineer` persona's `AUTO_SYNC_TOKEN`.
+#
+# Authoritative path: a single `git push origin staging` from
+# inside this workflow is the SSOT for advancing staging after
+# a main push. No PR, no merge queue, no human approval —
+# staging is mechanically maintained as a superset of main.
+#
+# `auto-promote-staging.yml` is the reverse-direction
+# counterpart (staging → main, gated on green CI). Together
+# they keep the staging-superset-of-main invariant tight.
+#
+# ============================================================
+# Why direct push (and not "open a PR")
+# ============================================================
+#
+# Pre-2026-05-06 the canonical SCM was GitHub.com, where:
+#   - The `staging` branch had a `merge_queue` ruleset that
+#     blocked ALL direct pushes (no bypass even for org
+#     admins or the GitHub Actions integration).
+#   - Therefore this workflow opened a PR via `gh pr create`
+#     and let auto-merge land it through the queue.
+#
+# Post-2026-05-06 the canonical SCM is Gitea
+# (`git.moleculesai.app/molecule-ai/molecule-core`). Gitea:
+#   - Has no `merge_queue` concept.
+#   - Allows direct push to protected branches via per-user
+#     `push_whitelist_usernames` on the branch protection.
+#   - Does not expose a GraphQL endpoint, so `gh pr create`
+#     returns `HTTP 405 Method Not Allowed
+#     (https://git.moleculesai.app/api/graphql)` — the
+#     pre-suspension architecture cannot work on Gitea.
+#
+# The molecule-ai/molecule-core staging branch protection
+# (verified via `GET /api/v1/repos/.../branch_protections`)
+# whitelists `devops-engineer` for direct push. So the
+# correct Gitea-shape architecture is: authenticate as
+# `devops-engineer`, merge locally, push staging directly.
+#
+# This is structurally simpler than the GitHub-era PR dance
+# and removes the dependence on `gh` CLI / GraphQL entirely.
+#
+# ============================================================
+# Identity + token (anti-bot-ring per saved-memory
+# `feedback_per_agent_gitea_identity_default`)
+# ============================================================
+#
+# This workflow uses `secrets.AUTO_SYNC_TOKEN`, which is a
+# personal access token issued to the `devops-engineer`
+# persona on Gitea — NOT the founder PAT. The bot-ring
+# fingerprint that triggered the GitHub org suspension on
+# 2026-05-06 was characterised by founder PAT acting as CI
+# at machine speed; per-persona identities split the
+# attribution honestly.
+#
+# Token scope on Gitea: repo write. Push target restricted
+# to `staging` (this workflow is the only writer; main is
+# untouched). Compromise blast radius: bounded to staging
+# branch + this repo's read surface.
+#
+# Commits are authored by the persona email
+# `devops-engineer@agents.moleculesai.app` so commit history
+# reflects which automation produced the merge.
+#
+# ============================================================
+# Failure modes & operational notes
+# ============================================================
+#
+# A — staging has commits main doesn't, and the merge
+#     conflicts:
+#     - The `--no-ff` merge step exits non-zero. Workflow
+#       fails red. Operator (devops-engineer or human)
+#       resolves manually:
+#         git fetch origin
+#         git checkout staging
+#         git merge --no-ff origin/main
+#         # resolve conflicts
+#         git push origin staging
+#     - Step summary surfaces the conflict so the failed run
+#       is self-explanatory.
+#
+# B — `AUTO_SYNC_TOKEN` rotated / wrong scope:
+#     - `git push` step exits non-zero with `HTTP 401` /
+#       `403`. Step summary surfaces the failed push.
+#     - Re-issue the token from `~/.molecule-ai/personas/`
+#       on the operator host and update the repo Actions
+#       secret. Re-run the workflow.
+#
+# C — staging branch protection no longer whitelists
+#     `devops-engineer`:
+#     - `git push` exits non-zero with a Gitea protected-
+#       branch rejection. Step summary surfaces it.
+#     - Re-add `devops-engineer` to
+#       `push_whitelist_usernames` on the staging
+#       protection (Settings → Branches → staging).
+#
+# D — concurrent push to main while a sync is in flight:
+#     - The `concurrency` group below serialises runs.
+#       The second waits for the first; if main advances
+#       again while we're syncing, the second run picks
+#       up the new tip on its own fetch.
+#
+# ============================================================
+# Loop safety
+# ============================================================
+#
+# The push to staging from this workflow does NOT itself
+# fire a `push: branches: [main]` event (different branch),
+# so there's no risk of self-recursion. `auto-promote-staging.yml`
+# fires on `workflow_run` of CI etc. — it sees the new
+# staging tip on its next gate-completion event, NOT on this
+# push directly. No loop.
+
+on:
+  push:
+    branches: [main]
+  # workflow_dispatch lets operators manually backfill a
+  # missed sync (e.g. if AUTO_SYNC_TOKEN was rotated and a
+  # main push slipped through while the secret was stale).
+  workflow_dispatch:
+
+permissions:
+  contents: write
+
+concurrency:
+  group: auto-sync-main-to-staging
+  cancel-in-progress: false
+
+jobs:
+  sync-staging:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout staging (with devops-engineer push token)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          ref: staging
+          # AUTO_SYNC_TOKEN authenticates as the
+          # `devops-engineer` Gitea persona — the only
+          # identity whitelisted for direct push to
+          # staging. See header comment for context.
+          token: ${{ secrets.AUTO_SYNC_TOKEN }}
+
+      - name: Configure git author
+        run: |
+          # Per-persona identity, NOT founder PAT.
+          # `feedback_per_agent_gitea_identity_default`.
+          git config user.name "devops-engineer"
+          git config user.email "devops-engineer@agents.moleculesai.app"
+
+      - name: Check if staging already contains main
+        id: check
+        run: |
+          set -euo pipefail
+          git fetch origin main
+          if git merge-base --is-ancestor origin/main HEAD; then
+            echo "needs_sync=false" >> "$GITHUB_OUTPUT"
+            {
+              echo "## No-op"
+              echo
+              echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
+            } >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "needs_sync=true" >> "$GITHUB_OUTPUT"
+            MAIN_SHORT=$(git rev-parse --short=8 origin/main)
+            echo "main_short=${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "::notice::staging is missing main's tip (${MAIN_SHORT}) — merging in-runner and pushing"
+          fi
+
+      - name: Merge main into staging (in-runner)
+        if: steps.check.outputs.needs_sync == 'true'
+        id: merge
+        run: |
+          set -euo pipefail
+          # Already on staging from checkout. Try fast-forward
+          # first (cleanest history); fall back to merge commit
+          # if staging has commits main doesn't.
+          if git merge --ff-only origin/main; then
+            echo "did_ff=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Fast-forwarded staging to origin/main"
+          else
+            echo "did_ff=false" >> "$GITHUB_OUTPUT"
+            if ! git merge --no-ff origin/main \
+                -m "chore: sync main → staging (auto, ${{ steps.check.outputs.main_short }})"; then
+              # Hygiene: leave the work tree clean before failing.
+              git merge --abort || true
+              {
+                echo "## Conflict"
+                echo
+                echo "Auto-merge \`main → staging\` failed with conflicts."
+                echo "A human (or devops-engineer persona) needs to resolve manually:"
+                echo
+                echo '```'
+                echo "git fetch origin"
+                echo "git checkout staging"
+                echo "git merge --no-ff origin/main"
+                echo "# resolve conflicts"
+                echo "git push origin staging"
+                echo '```'
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+            fi
+          fi
+
+      - name: Push staging to origin
+        if: steps.check.outputs.needs_sync == 'true'
+        run: |
+          set -euo pipefail
+          # Direct push to staging. devops-engineer persona is
+          # whitelisted for direct push on the staging branch
+          # protection (Settings → Branches → staging).
+          #
+          # No --force / --force-with-lease: a fast-forward or
+          # legitimate merge commit on top of current staging
+          # is the only thing we'd ever push. If origin/staging
+          # advanced under us (concurrent merge), the push
+          # legitimately rejects and the next run picks up the
+          # new state.
+          if ! git push origin staging; then
+            {
+              echo "## Push rejected"
+              echo
+              echo "Direct push to \`staging\` failed. Likely causes:"
+              echo "- \`AUTO_SYNC_TOKEN\` rotated / wrong scope (HTTP 401/403)"
+              echo "- \`devops-engineer\` no longer in"
+              echo "  \`push_whitelist_usernames\` on the staging"
+              echo "  branch protection (HTTP 422)"
+              echo "- staging advanced concurrently — re-running this"
+              echo "  workflow on the new main tip will pick it up"
+            } >> "$GITHUB_STEP_SUMMARY"
+            exit 1
+          fi
+
+          {
+            echo "## Auto-sync succeeded"
+            echo
+            echo "- staging advanced to: \`$(git rev-parse --short=8 HEAD)\`"
+            echo "- main tip: \`${{ steps.check.outputs.main_short }}\`"
+            echo "- Strategy: $([ "${{ steps.merge.outputs.did_ff }}" = "true" ] && echo "fast-forward" || echo "merge commit")"
+            echo "- Pushed by: \`devops-engineer\` (per-agent persona, anti-bot-ring)"
+          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/canary-staging.yml
+++ b/.github/workflows/canary-staging.yml
@ -20,19 +20,6 @@ on:
    # a few minutes under load — that's fine for a canary.
    - cron: '*/30 * * * *'
  workflow_dispatch:
-    inputs:
-      keep_on_failure:
-        description: >-
-          Skip teardown when the canary fails (debugging only). The
-          tenant org + EC2 + CF tunnel + DNS stay alive so an operator
-          can SSM into the workspace EC2 and capture docker logs of the
-          failing claude-code container. REMEMBER to manually delete
-          via DELETE /cp/admin/tenants/<slug> when done so the org
-          doesn't accumulate cost. Only honored on workflow_dispatch;
-          cron runs always tear down (we don't want unattended cron
-          to leak resources).
-        type: boolean
-        default: false

 # Serialise with the full-SaaS workflow so they don't contend for the
 # same org-create quota on staging. Different group key from
@ -93,14 +80,6 @@ jobs:
      # is "Token Plan only" but cheap-per-token and fast.
      E2E_MODEL_SLUG: MiniMax-M2.7-highspeed
      E2E_RUN_ID: "canary-${{ github.run_id }}"
-      # Debug-only: when an operator dispatches with keep_on_failure=true,
-      # the canary script's E2E_KEEP_ORG=1 path skips teardown so the
-      # tenant org + EC2 stay alive for SSM-based log capture. Cron runs
-      # never set this (the input only exists on workflow_dispatch) so
-      # unattended cron always tears down. See molecule-core#129
-      # failure mode #1 — capturing the actual exception requires
-      # docker logs from the live container.
-      E2E_KEEP_ORG: ${{ github.event.inputs.keep_on_failure == 'true' && '1' || '0' }}

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@ -158,28 +137,27 @@ jobs:
        id: canary
        run: bash tests/e2e/test_staging_full_saas.sh

-      # Alerting: open a sticky issue on the FIRST failure; comment on
-      # subsequent failures; auto-close on next green. Comment-on-existing
-      # de-duplicates so a single open issue accumulates the streak —
-      # ops sees one issue with N comments rather than N issues.
+      # Alerting: open an issue only after THREE consecutive failures so
+      # transient flakes (Cloudflare DNS hiccup, AWS API blip) don't spam
+      # the issue list. If an issue is already open, we still comment on
+      # every failure so ops sees the streak. Auto-close on next green.
      #
-      # Why no consecutive-failures threshold (e.g., wait 3 runs before
-      # filing): the prior threshold check used
-      # `github.rest.actions.listWorkflowRuns()` which Gitea 1.22.6 does
-      # not expose (returns 404). On Gitea Actions the threshold call
-      # ALWAYS failed, breaking the entire alerting step and going days
-      # silent on real regressions (38h+ chronic red on 2026-05-07/08
-      # before this fix; tracked in molecule-core#129). Filing on first
-      # failure is also better UX — we want to know about the first red,
-      # not wait 90 min for it to "count." Real flakes get one issue +
-      # a quick close-on-green; persistent reds accumulate comments.
+      # Threshold rationale: canary fires every 30 min, so 3 failures =
+      # ~90 min of consecutive red — well past any single-run flake but
+      # still tight enough that a real outage gets surfaced before the
+      # next deploy window.
      - name: Open issue on failure
        if: failure()
        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        env:
+          # Inject the workflow path explicitly — context.workflow is
+          # the *name*, not the file path the actions API needs.
+          WORKFLOW_PATH: '.github/workflows/canary-staging.yml'
+          CONSECUTIVE_THRESHOLD: '3'
        with:
          script: |
            const title = '🔴 Canary failing: staging SaaS smoke';
-            const runURL = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;

            // Find an existing open canary issue (stable title match).
            // If one exists, this isn't a "first failure" — comment and exit.
@ -199,12 +177,32 @@ jobs:
              return;
            }

-            // No open issue yet — file one on this first failure. The
-            // comment-on-existing branch above means subsequent failures
-            // accumulate as comments on this same issue, so we don't
-            // spam new issues per run.
+            // No open issue yet — check the last N-1 runs' conclusions.
+            // We open the issue only if the last (THRESHOLD-1) runs ALSO
+            // failed (so this is the 3rd consecutive red).
+            const threshold = parseInt(process.env.CONSECUTIVE_THRESHOLD, 10);
+            const { data: runs } = await github.rest.actions.listWorkflowRuns({
+              owner: context.repo.owner, repo: context.repo.repo,
+              workflow_id: process.env.WORKFLOW_PATH,
+              status: 'completed',
+              per_page: threshold,
+              // Skip the current in-progress run; it isn't 'completed' yet.
+            });
+            // listWorkflowRuns returns recent first. We need (threshold-1)
+            // prior failures (current run is the threshold-th).
+            const priorFailures = (runs.workflow_runs || [])
+              .slice(0, threshold - 1)
+              .filter(r => r.id !== context.runId)
+              .filter(r => r.conclusion === 'failure')
+              .length;
+            if (priorFailures < threshold - 1) {
+              core.info(`Below threshold: ${priorFailures + 1}/${threshold} consecutive failures — not filing yet`);
+              return;
+            }
+
            const body =
-              `Canary run failed at ${new Date().toISOString()}.\n\n` +
+              `Canary run failed at ${new Date().toISOString()}, ` +
+              `${threshold} consecutive runs red.\n\n` +
              `Run: ${runURL}\n\n` +
              `This issue auto-closes on the next green canary run. ` +
              `Consecutive failures add a comment here rather than a new issue.`;
@ -213,7 +211,7 @@ jobs:
              title, body,
              labels: ['canary-staging', 'bug'],
            });
-            core.info('Opened canary failure issue (first red)');
+            core.info(`Opened canary failure issue (${threshold} consecutive reds)`);

      - name: Auto-close canary issue on success
        if: success()
--- a/.github/workflows/e2e-staging-canvas.yml
+++ b/.github/workflows/e2e-staging-canvas.yml
@ -22,9 +22,9 @@ on:
  # spending CI cycles. See e2e-api.yml for the rationale on why this
  # is a single job rather than two-jobs-sharing-name.
  push:
-    branches: [main]
+    branches: [main, staging]
  pull_request:
-    branches: [main]
+    branches: [main, staging]
  workflow_dispatch:
  schedule:
    # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
--- a/.github/workflows/e2e-staging-external.yml
+++ b/.github/workflows/e2e-staging-external.yml
@ -32,7 +32,7 @@ name: E2E Staging External Runtime

 on:
  push:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/workspace.go'
      - 'workspace-server/internal/handlers/registry.go'
@ -44,7 +44,7 @@ on:
      - 'tests/e2e/test_staging_external_runtime.sh'
      - '.github/workflows/e2e-staging-external.yml'
  pull_request:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/workspace.go'
      - 'workspace-server/internal/handlers/registry.go'
--- a/.github/workflows/e2e-staging-saas.yml
+++ b/.github/workflows/e2e-staging-saas.yml
@ -20,12 +20,13 @@ name: E2E Staging SaaS (full lifecycle)
 #     via the same paths watcher that e2e-api.yml uses)

 on:
-  # Trunk-based (Phase 3 of internal#81): main is the only branch.
-  # Previously this fired on staging push too because staging was a
-  # superset of main and ran the gate ahead of auto-promote; with no
-  # staging branch, main is where E2E gates the deploy.
+  # Fire on staging push too — previously this only ran on main, which
+  # meant the most thorough end-to-end test caught regressions AFTER
+  # they shipped to staging (and then to the auto-promote PR). Running
+  # on staging push catches them BEFORE the staging→main promotion
+  # opens, so a green canary into auto-promote is more meaningful.
  push:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/registry.go'
      - 'workspace-server/internal/handlers/workspace_provision.go'
@ -35,7 +36,7 @@ on:
      - 'tests/e2e/test_staging_full_saas.sh'
      - '.github/workflows/e2e-staging-saas.yml'
  pull_request:
-    branches: [main]
+    branches: [staging, main]
    paths:
      - 'workspace-server/internal/handlers/registry.go'
      - 'workspace-server/internal/handlers/workspace_provision.go'
--- a/.github/workflows/harness-replays.yml
+++ b/.github/workflows/harness-replays.yml
@ -119,17 +119,6 @@ jobs:
      # symptom, different root cause: staging still has the in-image
      # clone path, hits the auth error directly).
      #
-      # 2026-05-08 sub-finding (#192): the clone step ALSO fails when
-      # any referenced workspace-template repo is private and the
-      # AUTO_SYNC_TOKEN bearer (devops-engineer persona) lacks read
-      # access. Root cause: 5 of 9 workspace-template repos
-      # (openclaw, codex, crewai, deepagents, gemini-cli) had been
-      # marked private with no team grant. Resolution: flipped them
-      # to public per `feedback_oss_first_repo_visibility_default`
-      # (the OSS surface should be public). Layer-3 (customer-private +
-      # marketplace third-party repos) tracked separately in
-      # internal#102.
-      #
      # Token shape matches publish-workspace-server-image.yml: AUTO_SYNC_TOKEN
      # is the devops-engineer persona PAT, NOT the founder PAT (per
      # `feedback_per_agent_gitea_identity_default`). clone-manifest.sh
--- a/.github/workflows/redeploy-tenants-on-staging.yml
+++ b/.github/workflows/redeploy-tenants-on-staging.yml
@ -36,7 +36,7 @@ on:
  workflow_run:
    workflows: ['publish-workspace-server-image']
    types: [completed]
-    branches: [main]
+    branches: [staging]
  workflow_dispatch:
    inputs:
      target_tag:
--- a/.github/workflows/retarget-main-to-staging.yml
+++ b/.github/workflows/retarget-main-to-staging.yml
@ -0,0 +1,276 @@
+name: Retarget main PRs to staging
+
+# Mechanical enforcement of SHARED_RULES rule 8 ("Staging-first
+# workflow, no exceptions"). When a bot opens a PR against `main`,
+# retarget it to `staging` automatically and leave an explanatory
+# comment. Human / CEO-authored PRs (the staging→main promotion
+# PRs, etc.) are left alone — they're the authorised exception
+# to the rule.
+#
+# ============================================================
+# What this workflow does
+# ============================================================
+#
+# On `pull_request_target` opened/reopened against `main`:
+#   1. If the PR head is `staging`, skip (the auto-promote PRs
+#      MUST stay base=main).
+#   2. If the PR author is a bot, retarget the PR base to
+#      `staging` via Gitea REST `PATCH /pulls/{N}` body
+#      `{"base":"staging"}`.
+#   3. If the retarget returns 422 "pull request already exists
+#      for base branch 'staging'" (issue #1884 case: another PR
+#      on the same head already targets staging), close the
+#      now-redundant main-PR via Gitea REST instead of failing
+#      red.
+#   4. Post an explainer comment on the retargeted PR via
+#      Gitea REST `POST /issues/{N}/comments`.
+#
+# ============================================================
+# Why Gitea REST (and not `gh api / gh pr close / gh pr comment`)
+# ============================================================
+#
+# Pre-2026-05-06 this workflow used `gh api -X PATCH "repos/{owner}/{repo}/pulls/{N}" -f base=staging`
+# plus `gh pr close` and `gh pr comment`. After the GitHub→Gitea
+# cutover those calls fail because:
+#
+#   - `gh` CLI defaults to `api.github.com`. Even with `GH_HOST`
+#     pointing at Gitea, `gh pr close / comment` route through
+#     GraphQL (`/api/graphql`) which Gitea does not expose.
+#     Empirical: every `gh pr *` call returns
+#     `HTTP 405 Method Not Allowed (https://git.moleculesai.app/api/graphql)`
+#     — same root cause as #65 (auto-sync, fixed in PR #66) and
+#     #73/#195 (auto-promote, fixed in PR #78).
+#   - `gh api -X PATCH /pulls/{N}` happens to use a REST path
+#     that Gitea also has, but the `gh` host-resolution layer
+#     and pagination/retry logic don't always hit Gitea cleanly,
+#     and the cost of switching to direct `curl` is one extra
+#     line of code.
+#
+# So this workflow uses direct `curl` calls to Gitea REST. No
+# `gh` CLI dependency, no GraphQL, no flaky host-resolution.
+#
+# ============================================================
+# Identity + token (anti-bot-ring per saved-memory
+# `feedback_per_agent_gitea_identity_default`)
+# ============================================================
+#
+# Pre-fix this workflow used the per-job ephemeral
+# `secrets.GITHUB_TOKEN`. On Gitea Actions that token has
+# narrow scope and unpredictable cross-PR write capability.
+#
+# Post-fix: `secrets.AUTO_SYNC_TOKEN` (the `devops-engineer`
+# Gitea persona). Same persona used by `auto-sync-main-to-staging.yml`
+# (PR #66) and `auto-promote-staging.yml` (PR #78). Token scope:
+# `push: true` repo write, sufficient for PR-edit + close + comment.
+#
+# Why this token does NOT need branch-protection bypass:
+# patching a PR's base ref is a PR-level operation that does not
+# require push perms on either branch (the PR's own commits stay
+# put; only the metadata changes).
+#
+# ============================================================
+# Failure modes & operational notes
+# ============================================================
+#
+# A — PATCH base→staging returns 422 "pull request already exists"
+#     (issue #1884 case):
+#     - Detected by string-match on response body. Workflow
+#       falls through to closing the now-redundant main-PR
+#       (Gitea REST `PATCH /pulls/{N}` with `state: closed`)
+#       and posts an explanation comment. Step summary surfaces.
+#
+# B — `AUTO_SYNC_TOKEN` rotated / wrong scope:
+#     - First REST call returns 401/403. Step summary surfaces.
+#       Re-issue token from `~/.molecule-ai/personas/` on the
+#       operator host and update repo Actions secret.
+#
+# C — PR was deleted between trigger and run:
+#     - REST call returns 404. Workflow exits 0 with a notice
+#       (the rule was already enforced or the PR is gone).
+#
+# D — author is not actually a bot but the filter mis-fires:
+#     - Filter is conservative: only triggers on
+#       `user.type == 'Bot'`, `login` ends with `[bot]`, or
+#       known bot logins (`molecule-ai[bot]`, `app/molecule-ai`).
+#       Human PRs slip through unaffected. If a NEW bot login
+#       starts shipping main-PRs, add it to the filter.
+
+on:
+  pull_request_target:
+    types: [opened, reopened]
+    branches: [main]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  retarget:
+    name: Retarget to staging
+    runs-on: ubuntu-latest
+    # Only fire for bot-authored PRs. Human CEO PRs (staging→main
+    # promotion) are intentional and pass through.
+    #
+    # Head-ref guard: never retarget a PR whose head IS `staging`
+    # — those are the auto-promote staging→main PRs (opened by
+    # `devops-engineer` since PR #78 / #195 fix). Retargeting
+    # head=staging onto base=staging fails with HTTP 422 "no new
+    # commits between base 'staging' and head 'staging'", which
+    # would surface as a noisy red workflow run on every
+    # auto-promote (caught 2026-05-03 on the GitHub-era PR #2588).
+    if: >-
+      github.event.pull_request.head.ref != 'staging'
+      && (
+        github.event.pull_request.user.type == 'Bot'
+        || endsWith(github.event.pull_request.user.login, '[bot]')
+        || github.event.pull_request.user.login == 'app/molecule-ai'
+        || github.event.pull_request.user.login == 'molecule-ai[bot]'
+        || github.event.pull_request.user.login == 'devops-engineer'
+      )
+    steps:
+      - name: Retarget PR base to staging via Gitea REST
+        id: retarget
+        env:
+          GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+          GITEA_HOST: ${{ vars.GITEA_HOST || 'https://git.moleculesai.app' }}
+          REPO: ${{ github.repository }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        # Issue #1884 case: when the bot opens a PR against main
+        # and there's already another PR on the same head branch
+        # targeting staging, Gitea's PATCH returns 422 with a
+        # body mentioning "pull request already exists for base
+        # branch 'staging'" (the Gitea message wording is
+        # slightly different from GitHub's; the substring match
+        # below covers both for forward/back compat).
+        # The retarget can't proceed — but the right response is
+        # to close the now-redundant main-PR, not to fail the
+        # workflow noisily. Detect that specific 422 and close
+        # instead.
+        run: |
+          set -euo pipefail
+
+          API="${GITEA_HOST}/api/v1/repos/${REPO}"
+          AUTH=(-H "Authorization: token ${GITEA_TOKEN}" -H "Accept: application/json")
+
+          echo "Retargeting PR #${PR_NUMBER} (author: ${PR_AUTHOR}) from main → staging"
+
+          # Curl-status-capture pattern per `feedback_curl_status_capture_pollution`:
+          # http_code via -w to its own scalar, body to a tempfile, set +e/-e
+          # bracket so curl's non-zero-on-4xx doesn't pollute the script's exit chain.
+          BODY_FILE=$(mktemp)
+          REQ='{"base":"staging"}'
+
+          set +e
+          STATUS=$(curl -sS "${AUTH[@]}" -H "Content-Type: application/json" \
+            -X PATCH -d "${REQ}" \
+            -o "${BODY_FILE}" -w "%{http_code}" \
+            "${API}/pulls/${PR_NUMBER}")
+          CURL_RC=$?
+          set -e
+
+          if [ "${CURL_RC}" -ne 0 ]; then
+            echo "::error::curl PATCH failed (rc=${CURL_RC})"
+            rm -f "${BODY_FILE}"
+            exit 1
+          fi
+
+          if [ "${STATUS}" = "201" ] || [ "${STATUS}" = "200" ]; then
+            NEW_BASE=$(jq -r '.base.ref // "?"' < "${BODY_FILE}")
+            rm -f "${BODY_FILE}"
+            if [ "${NEW_BASE}" = "staging" ]; then
+              echo "::notice::Retargeted PR #${PR_NUMBER} → staging"
+              echo "outcome=retargeted" >> "$GITHUB_OUTPUT"
+              exit 0
+            fi
+            echo "::error::PATCH returned ${STATUS} but base.ref is '${NEW_BASE}', not 'staging'"
+            exit 1
+          fi
+
+          # Specifically match the 422 duplicate-base/head error so
+          # any OTHER PATCH failure (auth, deleted PR, etc.) still
+          # surfaces as a real workflow failure.
+          BODY=$(cat "${BODY_FILE}" || true)
+          rm -f "${BODY_FILE}"
+
+          if [ "${STATUS}" = "422" ] && echo "${BODY}" | grep -qE "(pull request already exists for base branch 'staging'|already exists.*base.*staging)"; then
+            echo "::notice::PR #${PR_NUMBER}: duplicate target-staging PR exists on same head — closing this main-PR as redundant."
+
+            # Close the now-redundant main-PR via Gitea REST
+            # (PATCH state=closed). Post comment explaining
+            # rationale BEFORE close so the comment lands on the
+            # PR (commenting on a closed PR works on Gitea, but
+            # historically caused notification ordering surprises).
+
+            CLOSE_BODY_FILE=$(mktemp)
+            CMT_REQ=$(jq -n '{body:"[retarget-bot] Closing — another PR on the same head branch already targets `staging`. This PR is redundant. See issue #1884 for the rationale."}')
+            set +e
+            CMT_STATUS=$(curl -sS "${AUTH[@]}" -H "Content-Type: application/json" \
+              -X POST -d "${CMT_REQ}" \
+              -o "${CLOSE_BODY_FILE}" -w "%{http_code}" \
+              "${API}/issues/${PR_NUMBER}/comments")
+            set -e
+            if [ "${CMT_STATUS}" != "201" ]; then
+              echo "::warning::dup-close comment POST returned ${CMT_STATUS}; continuing to close anyway"
+              cat "${CLOSE_BODY_FILE}" | head -c 300 || true
+            fi
+            rm -f "${CLOSE_BODY_FILE}"
+
+            CLOSE_REQ='{"state":"closed"}'
+            CLOSE_RESP=$(mktemp)
+            set +e
+            CL_STATUS=$(curl -sS "${AUTH[@]}" -H "Content-Type: application/json" \
+              -X PATCH -d "${CLOSE_REQ}" \
+              -o "${CLOSE_RESP}" -w "%{http_code}" \
+              "${API}/pulls/${PR_NUMBER}")
+            set -e
+            if [ "${CL_STATUS}" = "201" ] || [ "${CL_STATUS}" = "200" ]; then
+              echo "::notice::Closed PR #${PR_NUMBER} as redundant"
+              echo "outcome=closed-as-duplicate" >> "$GITHUB_OUTPUT"
+              rm -f "${CLOSE_RESP}"
+              exit 0
+            fi
+            echo "::error::Failed to close redundant PR: HTTP ${CL_STATUS}"
+            cat "${CLOSE_RESP}" | head -c 300 || true
+            rm -f "${CLOSE_RESP}"
+            exit 1
+          fi
+
+          echo "::error::Retarget PATCH failed and was NOT a duplicate-base error: HTTP ${STATUS}"
+          echo "${BODY}" | head -c 500 >&2
+          exit 1
+
+      - name: Post explainer comment
+        if: steps.retarget.outputs.outcome == 'retargeted'
+        env:
+          GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+          GITEA_HOST: ${{ vars.GITEA_HOST || 'https://git.moleculesai.app' }}
+          REPO: ${{ github.repository }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          set -euo pipefail
+
+          API="${GITEA_HOST}/api/v1/repos/${REPO}"
+          AUTH=(-H "Authorization: token ${GITEA_TOKEN}" -H "Accept: application/json")
+
+          # PR comments live on the issue endpoint in Gitea
+          # (PRs ARE issues — same endpoint, different sub-resources
+          # for diffs/files/etc.). The body uses jq to safely
+          # encode the multi-line markdown without shell-quote
+          # nightmares.
+          REQ=$(jq -n '{body:"[retarget-bot] This PR was opened against `main` and has been retargeted to `staging` automatically.\n\n**Why:** per [SHARED_RULES rule 8](https://git.moleculesai.app/molecule-ai/molecule-ai-org-template-molecule-dev/src/branch/main/SHARED_RULES.md), all feature work targets `staging` first; the CEO promotes `staging → main` separately.\n\n**What changed:** just the base branch — no code change. CI will re-run against `staging`. If you get merge conflicts, rebase on `staging`.\n\n**If this PR is the CEO`s staging→main promotion:** the Action skipped you (only bot-authored PRs are retargeted, head=staging is also exempted). If you see this comment on your CEO PR, that`s a bug — please tag @hongmingwang."}')
+
+          BODY_FILE=$(mktemp)
+          set +e
+          STATUS=$(curl -sS "${AUTH[@]}" -H "Content-Type: application/json" \
+            -X POST -d "${REQ}" \
+            -o "${BODY_FILE}" -w "%{http_code}" \
+            "${API}/issues/${PR_NUMBER}/comments")
+          set -e
+
+          if [ "${STATUS}" = "201" ]; then
+            echo "::notice::Posted explainer comment on PR #${PR_NUMBER}"
+          else
+            echo "::warning::Failed to post explainer (HTTP ${STATUS}) — retarget itself succeeded"
+            cat "${BODY_FILE}" | head -c 300 || true
+          fi
+          rm -f "${BODY_FILE}"
--- a/28
+++ b/28
@ -1,28 +0,0 @@
-# Top-level Makefile — convenience wrappers around docker compose.
-#
-# Most molecule-core dev work happens via these shortcuts. CI doesn't
-# use this Makefile; CI calls docker compose / go test directly so the
-# Makefile can evolve without breaking the build.
-
-.PHONY: help dev up down logs build test
-
-help: ## Show this help.
-	@grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-12s\033[0m %s\n", $$1, $$2}'
-
-dev: ## Start the full stack with air hot-reload for the platform service.
-	docker compose -f docker-compose.yml -f docker-compose.dev.yml up
-
-up: ## Start the full stack in production-shape mode (no air, normal Dockerfile).
-	docker compose up
-
-down: ## Stop the stack and remove containers (volumes preserved).
-	docker compose down
-
-logs: ## Tail logs from all services (Ctrl-C to detach).
-	docker compose logs -f
-
-build: ## Force a fresh build of the platform image (no cache).
-	docker compose build --no-cache platform
-
-test: ## Run Go unit tests in workspace-server/.
-	cd workspace-server && go test -race ./...
--- a/canvas/.dockerignore
+++ b/canvas/.dockerignore
@ -1,10 +0,0 @@
-# Excluded from `docker build` context. Without this, the COPY . . step in
-# canvas/Dockerfile clobbers the freshly-installed node_modules with the
-# host's (potentially broken / wrong-arch) copy — the @tailwindcss/oxide
-# native binary disagreed and broke `next build`.
-node_modules
-.next
-.git
-*.log
-.env*
-!.env.example
--- a/canvas/Dockerfile
+++ b/canvas/Dockerfile
@ -1,11 +1,7 @@
 FROM node:22-alpine AS builder
 WORKDIR /app
 COPY package.json package-lock.json* ./
-# `npm ci` (not `install`) for lockfile-exact reproducibility.
-# `--include=optional` ensures the platform-specific @tailwindcss/oxide
-# native binary lands — without it, postcss fails with "Cannot read
-# properties of undefined (reading 'All')" at build time.
-RUN npm ci --include=optional
+RUN npm install
 COPY . .
 ARG NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080
 ARG NEXT_PUBLIC_WS_URL=ws://localhost:8080/ws
--- a/canvas/next.config.ts
+++ b/canvas/next.config.ts
@ -17,24 +17,6 @@ import { dirname, join } from "node:path";
 // update one heuristic. Production is unaffected: `output: "standalone"`
 // bakes resolved env into the build, and the marker file isn't shipped.
 loadMonorepoEnv();
-// Boot-time matched-pair guard for ADMIN_TOKEN / NEXT_PUBLIC_ADMIN_TOKEN.
-// When ADMIN_TOKEN is set on the workspace-server (server-side bearer
-// gate, wsauth_middleware.go ~L245), the canvas MUST send the matching
-// NEXT_PUBLIC_ADMIN_TOKEN as `Authorization: Bearer ...` on every API
-// call. If only one is set, every workspace API call 401s silently —
-// the canvas hydrates with empty data and the user sees a broken page
-// with no console hint about the auth-config mismatch.
-//
-// Pre-fix the matched-pair contract was descriptive only (a comment in
-// .env): future devs/agents could re-misconfigure with one of the two
-// unset and silently 401. Closes the post-PR-#174 self-review gap.
-//
-// Warn-only (not exit) — production canvas Docker images bake these
-// vars into the build at image-build time, and a missed pair there
-// would still emit the warning at runtime via the standalone server's
-// startup. Killing the process on misconfiguration would turn a
-// recoverable auth issue into a hard crashloop.
-checkAdminTokenPair();

 const nextConfig: NextConfig = {
  output: "standalone",
@ -75,43 +57,6 @@ function loadMonorepoEnv() {
  );
 }

-// Boot-time matched-pair guard. Runs after .env has been loaded so the
-// check sees the post-load state. The two env vars must be set or
-// unset together; one-without-the-other is the silent-401 footgun.
-//
-// Treats empty string ("") as unset. An explicitly-empty `KEY=` in
-// .env counts as set-to-empty in `process.env`, but for auth purposes
-// an empty bearer token is equivalent to no token — so both
-// `ADMIN_TOKEN=` and an unset ADMIN_TOKEN are equivalent relative to
-// the matched-pair invariant.
-//
-// Returns void; side effect is the console.error warning. Kept as a
-// separate function (exported) so a future test can reset env, call
-// this, and assert on captured stderr.
-export function checkAdminTokenPair(): void {
-  const serverSet = !!process.env.ADMIN_TOKEN;
-  const clientSet = !!process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  if (serverSet === clientSet) return;
-  // Distinct messages so the operator can tell which half is missing
-  // — the fix is symmetric (set the other one) but the diagnostic
-  // mentions which side is currently set so they don't have to grep.
-  if (serverSet && !clientSet) {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
-        "canvas will 401 against workspace-server because the bearer header " +
-        "is never attached. Set both to the same value, or unset both.",
-    );
-  } else {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
-        "workspace-server will reject the bearer because no AdminAuth gate " +
-        "is configured. Set both to the same value, or unset both.",
-    );
-  }
-}
-
 function findMonorepoRoot(start: string): string | null {
  let dir = start;
  for (let i = 0; i < 6; i++) {
--- a/canvas/src/components/tabs/chat/AttachmentAudio.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentAudio.tsx
@ -9,7 +9,6 @@
 // AttachmentLightbox).

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentChip } from "./AttachmentViews";
@ -44,8 +43,13 @@ export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: P
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          signal: AbortSignal.timeout(60_000),
        });
@ -112,5 +116,9 @@ export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: P
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentImage.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentImage.tsx
@ -35,7 +35,6 @@
 //   downscale via canvas, but defer that to v2.

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentLightbox } from "./AttachmentLightbox";
@ -76,14 +75,22 @@ export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: P
    }

    // Platform-auth path: identical to downloadChatFile but we keep
-    // the blob (don't trigger a Save-As). Auth headers come from the
-    // shared `platformAuthHeaders()` helper — one source of truth for
-    // every authenticated raw fetch in the canvas (#178).
+    // the blob (don't trigger a Save-As). Use the same headers it does
+    // by going through it indirectly — no, downloadChatFile triggers a
+    // Save-As. Need a separate fetch.
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        // Read the same env var downloadChatFile reads — single source
+        // of truth would be cleaner; refactor opportunity for PR-2 if
+        // we add the same path to AttachmentVideo.
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          signal: AbortSignal.timeout(30_000),
        });
@ -177,7 +184,15 @@ export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: P
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api which uses the canonical
-// getTenantSlug() from @/lib/tenant. This eliminates the duplicate
-// hostname-regex + the duplicate bearer-token-attach pattern (#178).
+// Internal helper — duplicated from uploads.ts (it's not exported
+// there). Kept local so this component doesn't reach into private
+// surface; if AttachmentVideo / AttachmentPDF in PR-2/PR-3 also need
+// it, lift to an exported helper at that point (the third-caller
+// rule).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  // Tenant subdomain shape: <slug>.moleculesai.app
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentPDF.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentPDF.tsx
@ -33,7 +33,6 @@
 //     timeout, swap to chip. Implemented as a 3-second watchdog.

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentLightbox } from "./AttachmentLightbox";
@ -70,8 +69,13 @@ export function AttachmentPDF({ workspaceId, attachment, onDownload, tone }: Pro
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          signal: AbortSignal.timeout(60_000),
        });
@ -185,5 +189,9 @@ function PdfGlyph() {
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentTextPreview.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentTextPreview.tsx
@ -26,7 +26,6 @@
 // to download the full file.

 import { useState, useEffect } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentChip } from "./AttachmentViews";
@ -58,13 +57,13 @@ export function AttachmentTextPreview({ workspaceId, attachment, onDownload, ton
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
-        // Only attach platform auth headers for in-platform URIs —
-        // off-platform URLs (HTTP/HTTPS attachments) MUST NOT receive
-        // our bearer token (it would leak the admin token to a third
-        // party). The branch is preserved with the new shared helper.
-        const headers: Record<string, string> = isPlatformAttachment(attachment.uri)
-          ? platformAuthHeaders()
-          : {};
+        const headers: Record<string, string> = {};
+        if (isPlatformAttachment(attachment.uri)) {
+          const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+          if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+          const slug = getTenantSlug();
+          if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        }
        const res = await fetch(href, {
          headers,
          credentials: "include",
@ -183,5 +182,9 @@ export function AttachmentTextPreview({ workspaceId, attachment, onDownload, ton
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/AttachmentVideo.tsx
+++ b/canvas/src/components/tabs/chat/AttachmentVideo.tsx
@ -25,7 +25,6 @@
 // fetch via service worker. v2 if measured-needed.

 import { useState, useEffect, useRef } from "react";
-import { platformAuthHeaders } from "@/lib/api";
 import type { ChatAttachment } from "./types";
 import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
 import { AttachmentChip } from "./AttachmentViews";
@ -62,8 +61,13 @@ export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: P
    void (async () => {
      try {
        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
        const res = await fetch(href, {
-          headers: platformAuthHeaders(),
+          headers,
          credentials: "include",
          // Videos are larger than images on average; give the request
          // more headroom. The server's per-request body cap (50MB) is
@ -143,5 +147,11 @@ export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: P
  );
 }

-// Local getTenantSlug() removed — auth-header construction now goes
-// through platformAuthHeaders() from @/lib/api (#178).
+// Internal helper — same shape as AttachmentImage's. Lifted to a
+// shared util in PR-2.5 if a third caller needs it (PDF, audio).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
--- a/canvas/src/components/tabs/chat/uploads.ts
+++ b/canvas/src/components/tabs/chat/uploads.ts
@ -1,16 +1,12 @@
-import { PLATFORM_URL, platformAuthHeaders } from "@/lib/api";
+import { PLATFORM_URL } from "@/lib/api";
+import { getTenantSlug } from "@/lib/tenant";
 import type { ChatAttachment } from "./types";

 /** Chat attachments are intentionally uploaded via a direct fetch()
 *  instead of the `api.post` helper — `api.post` JSON-stringifies the
- *  body, which would 500 on a Blob. Auth headers (tenant slug, admin
- *  token, credentials) come from `platformAuthHeaders()` — the same
- *  helper `request()` uses, so a missing bearer surfaces as a single
- *  fix site instead of N copies. We deliberately do NOT set
- *  Content-Type so the browser writes the multipart boundary into the
- *  header; setting it manually would yield a multipart body the server
- *  can't parse. See lib/api.ts platformAuthHeaders() for the full
- *  rationale on why this pair must stay matched. */
+ *  body, which would 500 on a Blob. Mirrors the header plumbing
+ *  (tenant slug, admin token, credentials) so SaaS + self-hosted
+ *  callers work the same way. */
 export async function uploadChatFiles(
  workspaceId: string,
  files: File[],
@ -20,12 +16,18 @@ export async function uploadChatFiles(
  const form = new FormData();
  for (const f of files) form.append("files", f, f.name);

+  const headers: Record<string, string> = {};
+  const slug = getTenantSlug();
+  if (slug) headers["X-Molecule-Org-Slug"] = slug;
+  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+
  // Uploads legitimately take a while on cold cache (tar write +
  // docker cp into the container). 60s is comfortable for the 25MB/
  // 50MB caps the server enforces.
  const res = await fetch(`${PLATFORM_URL}/workspaces/${workspaceId}/chat/uploads`, {
    method: "POST",
-    headers: platformAuthHeaders(),
+    headers,
    body: form,
    credentials: "include",
    signal: AbortSignal.timeout(60_000),
@ -141,8 +143,14 @@ export async function downloadChatFile(
    return;
  }

+  const headers: Record<string, string> = {};
+  const slug = getTenantSlug();
+  if (slug) headers["X-Molecule-Org-Slug"] = slug;
+  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+
  const res = await fetch(href, {
-    headers: platformAuthHeaders(),
+    headers,
    credentials: "include",
    signal: AbortSignal.timeout(60_000),
  });
--- a/canvas/src/lib/tests/admin-token-pair.test.ts
+++ b/canvas/src/lib/tests/admin-token-pair.test.ts
@ -1,130 +0,0 @@
-// @vitest-environment node
-import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
-
-// Tests for the boot-time matched-pair guard added to next.config.ts.
-//
-// Why this lives in src/lib/__tests__ even though the function is in
-// canvas/next.config.ts:
-//   - next.config.ts runs as ESM-but-also-CJS depending on which
-//     consumer loads it (Next.js dev server vs Next.js build); we
-//     want the test to be a plain ESM module Vitest already handles.
-//   - Importing from "../../../next.config" pulls in the rest of the
-//     file (loadMonorepoEnv, the default export, etc.) which has
-//     side effects on module load (it runs loadMonorepoEnv()
-//     immediately). To keep the test hermetic we don't import — we
-//     duplicate the function under test.
-//
-// Sourcing the function from a shared module would be cleaner, but
-// next.config.ts is required to be a single self-contained file by
-// Next.js's loader on some host configurations. Pin invariant: the
-// duplicated function below MUST stay byte-identical to the one in
-// next.config.ts. If you change one, change the other and bump this
-// comment.
-
-function checkAdminTokenPair(): void {
-  const serverSet = !!process.env.ADMIN_TOKEN;
-  const clientSet = !!process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  if (serverSet === clientSet) return;
-  if (serverSet && !clientSet) {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
-        "canvas will 401 against workspace-server because the bearer header " +
-        "is never attached. Set both to the same value, or unset both.",
-    );
-  } else {
-    // eslint-disable-next-line no-console
-    console.error(
-      "[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
-        "workspace-server will reject the bearer because no AdminAuth gate " +
-        "is configured. Set both to the same value, or unset both.",
-    );
-  }
-}
-
-describe("checkAdminTokenPair", () => {
-  // Snapshot env so individual tests can stomp on it without leaking.
-  // Rebuild from snapshot in afterEach so the next test sees a known
-  // baseline regardless of mutation pattern.
-  let originalEnv: Record<string, string | undefined>;
-  let errorSpy: ReturnType<typeof vi.spyOn>;
-
-  beforeEach(() => {
-    originalEnv = {
-      ADMIN_TOKEN: process.env.ADMIN_TOKEN,
-      NEXT_PUBLIC_ADMIN_TOKEN: process.env.NEXT_PUBLIC_ADMIN_TOKEN,
-    };
-    delete process.env.ADMIN_TOKEN;
-    delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    errorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
-  });
-
-  afterEach(() => {
-    if (originalEnv.ADMIN_TOKEN === undefined) delete process.env.ADMIN_TOKEN;
-    else process.env.ADMIN_TOKEN = originalEnv.ADMIN_TOKEN;
-    if (originalEnv.NEXT_PUBLIC_ADMIN_TOKEN === undefined) delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    else process.env.NEXT_PUBLIC_ADMIN_TOKEN = originalEnv.NEXT_PUBLIC_ADMIN_TOKEN;
-    errorSpy.mockRestore();
-  });
-
-  it("emits no warning when both are unset", () => {
-    checkAdminTokenPair();
-    expect(errorSpy).not.toHaveBeenCalled();
-  });
-
-  it("emits no warning when both are set (matched pair, the happy path)", () => {
-    process.env.ADMIN_TOKEN = "local-dev-admin";
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
-    checkAdminTokenPair();
-    expect(errorSpy).not.toHaveBeenCalled();
-  });
-
-  it("warns when ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not", () => {
-    process.env.ADMIN_TOKEN = "local-dev-admin";
-    checkAdminTokenPair();
-    expect(errorSpy).toHaveBeenCalledTimes(1);
-    // Exact-string assertion — substring would also pass when the
-    // function's branch logic is broken (e.g. emits both messages, or
-    // emits the wrong one). Pin the exact message that operators will
-    // see in their dev console so regressions are visible.
-    expect(errorSpy).toHaveBeenCalledWith(
-      "[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
-        "canvas will 401 against workspace-server because the bearer header " +
-        "is never attached. Set both to the same value, or unset both.",
-    );
-  });
-
-  it("warns when NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not", () => {
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
-    checkAdminTokenPair();
-    expect(errorSpy).toHaveBeenCalledTimes(1);
-    expect(errorSpy).toHaveBeenCalledWith(
-      "[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
-        "workspace-server will reject the bearer because no AdminAuth gate " +
-        "is configured. Set both to the same value, or unset both.",
-    );
-  });
-
-  // Empty string in process.env is the JS-side representation of `KEY=`
-  // (no value) in a .env file. Treating "" as unset makes the pair
-  // invariant symmetric: `KEY=` and `unset KEY` produce the same
-  // verdict. Without this branch, an operator who comments out the
-  // value but leaves the line would get a false-positive warning.
-  it("treats empty string as unset (so KEY= and unset KEY are equivalent)", () => {
-    process.env.ADMIN_TOKEN = "";
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
-    checkAdminTokenPair();
-    expect(errorSpy).not.toHaveBeenCalled();
-  });
-
-  it("warns when ADMIN_TOKEN is set and NEXT_PUBLIC_ADMIN_TOKEN is empty string", () => {
-    process.env.ADMIN_TOKEN = "local-dev-admin";
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
-    checkAdminTokenPair();
-    expect(errorSpy).toHaveBeenCalledTimes(1);
-    // First branch — server set, client unset.
-    expect(errorSpy).toHaveBeenCalledWith(
-      expect.stringContaining("ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not"),
-    );
-  });
-});
--- a/canvas/src/lib/tests/platform-auth-headers.test.ts
+++ b/canvas/src/lib/tests/platform-auth-headers.test.ts
@ -1,97 +0,0 @@
-// @vitest-environment jsdom
-import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
-
-// Tests for platformAuthHeaders — the shared helper extracted in #178
-// to consolidate the bearer-token-attach + tenant-slug-attach pattern
-// that was previously duplicated across 7 raw-fetch callsites in the
-// canvas (uploads + 5 Attachment* components + the api.ts request()
-// function).
-//
-// What we pin here:
-//  - Returns a fresh object each call (so callers can mutate without
-//    leaking into each other).
-//  - Empty result on a non-tenant host with no admin token (the
-//    localhost / self-hosted shape).
-//  - Bearer attached when NEXT_PUBLIC_ADMIN_TOKEN is set.
-//  - X-Molecule-Org-Slug attached when window.location.hostname is a
-//    tenant subdomain (<slug>.moleculesai.app).
-//  - Both attached when both apply (the production SaaS shape).
-//
-// Why jsdom: getTenantSlug() reads window.location.hostname. Node-only
-// environment yields no window and getTenantSlug returns null
-// unconditionally — wouldn't exercise the slug branch.
-
-import { platformAuthHeaders } from "../api";
-
-describe("platformAuthHeaders", () => {
-  let originalAdminToken: string | undefined;
-
-  beforeEach(() => {
-    originalAdminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  });
-
-  afterEach(() => {
-    if (originalAdminToken === undefined) delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-    else process.env.NEXT_PUBLIC_ADMIN_TOKEN = originalAdminToken;
-    // jsdom resets hostname between tests via the @vitest-environment
-    // pragma's per-test isolation. No explicit reset needed.
-  });
-
-  it("returns an empty object on a non-tenant host with no admin token", () => {
-    // jsdom default hostname is "localhost" — not a tenant slug, so
-    // getTenantSlug() returns null and no X-Molecule-Org-Slug is added.
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({});
-  });
-
-  it("attaches Authorization when NEXT_PUBLIC_ADMIN_TOKEN is set", () => {
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({ Authorization: "Bearer local-dev-admin" });
-  });
-
-  it("does NOT attach Authorization when NEXT_PUBLIC_ADMIN_TOKEN is empty string", () => {
-    // Empty-string env is the JS-side shape of `KEY=` in .env.
-    // Treating it as unset matches the matched-pair guard in
-    // next.config.ts (admin-token-pair.test.ts) — symmetric semantics.
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({});
-  });
-
-  it("attaches X-Molecule-Org-Slug on a tenant subdomain", () => {
-    Object.defineProperty(window, "location", {
-      value: { hostname: "reno-stars.moleculesai.app" },
-      writable: true,
-    });
-    const headers = platformAuthHeaders();
-    expect(headers).toEqual({ "X-Molecule-Org-Slug": "reno-stars" });
-  });
-
-  it("attaches both when both apply (production SaaS shape)", () => {
-    Object.defineProperty(window, "location", {
-      value: { hostname: "reno-stars.moleculesai.app" },
-      writable: true,
-    });
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "tenant-bearer";
-    const headers = platformAuthHeaders();
-    // Pin exact-equality on the full shape — substring/contains
-    // assertions would also pass for an extra-header bug.
-    expect(headers).toEqual({
-      "X-Molecule-Org-Slug": "reno-stars",
-      Authorization: "Bearer tenant-bearer",
-    });
-  });
-
-  it("returns a fresh object each call (callers can mutate safely)", () => {
-    process.env.NEXT_PUBLIC_ADMIN_TOKEN = "tok";
-    const a = platformAuthHeaders();
-    const b = platformAuthHeaders();
-    expect(a).not.toBe(b); // distinct refs
-    expect(a).toEqual(b); // same content
-    a["Content-Type"] = "application/json";
-    // Mutation on `a` does not leak into `b`.
-    expect(b["Content-Type"]).toBeUndefined();
-  });
-});
--- a/canvas/src/lib/api.ts
+++ b/canvas/src/lib/api.ts
@ -21,45 +21,6 @@ export interface RequestOptions {
  timeoutMs?: number;
 }

-/**
- * Build the platform auth header set used by every authenticated fetch
- * from the canvas. Returns a fresh object so callers can mutate (e.g.
- * append `Content-Type` for JSON requests, omit it for FormData).
- *
- * SaaS cross-origin shape:
- *  - `X-Molecule-Org-Slug` — derived from `window.location.hostname`
- *    by `getTenantSlug()`. Control plane uses it for fly-replay
- *    routing. Empty on localhost / non-tenant hosts — safe to omit.
- *  - `Authorization: Bearer <token>` — `NEXT_PUBLIC_ADMIN_TOKEN` baked
- *    into the canvas build (see canvas/Dockerfile L8/L11). Required by
- *    the workspace-server when `ADMIN_TOKEN` is set on the server side
- *    (Tier-2b AdminAuth gate, wsauth_middleware.go ~L245). Empty when
- *    no admin token was provisioned — the Tier-1 session-cookie path
- *    handles that case via `credentials:"include"`.
- *
- * Why a shared helper: the two-line "read env, attach bearer; read
- * slug, attach header" pattern was duplicated across `request()` and
- * 7 raw-fetch callsites (chat uploads/download + 5 Attachment*
- * components) before this consolidation. A new poller or raw fetch
- * that forgets one of the two headers silently 401s against
- * workspace-server when ADMIN_TOKEN is set — the exact bug shape
- * called out in #178 / closes the post-#176 self-review gap.
- *
- * Callers that want JSON Content-Type should spread this and add it
- * themselves; FormData callers should NOT add Content-Type (the
- * browser sets the multipart boundary). Centralizing the auth pair
- * but leaving Content-Type up to the caller is the minimum viable
- * shared shape.
- */
-export function platformAuthHeaders(): Record<string, string> {
-  const headers: Record<string, string> = {};
-  const slug = getTenantSlug();
-  if (slug) headers["X-Molecule-Org-Slug"] = slug;
-  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
-  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
-  return headers;
-}
-
 async function request<T>(
  method: string,
  path: string,
@ -67,16 +28,17 @@ async function request<T>(
  retryCount = 0,
  options?: RequestOptions,
 ): Promise<T> {
-  // JSON-bodied request — Content-Type is JSON. Auth pair comes from
-  // the shared helper; see its doc comment for the SaaS-shape rationale.
-  const headers: Record<string, string> = {
-    "Content-Type": "application/json",
-    ...platformAuthHeaders(),
-  };
-  // Re-read slug locally for the 401 handler below — `headers` already
-  // has it, but the 401 branch needs the bare value to gate the
-  // session-probe + redirect logic on tenant context.
+  // SaaS cross-origin shape:
+  //  - X-Molecule-Org-Slug: derived from window.location.hostname by
+  //    getTenantSlug(). Control plane uses it for fly-replay routing.
+  //    Empty on localhost / non-tenant hosts — safe to omit.
+  //  - credentials:"include": sends the session cookie cross-origin.
+  //    Cookie's Domain=.moleculesai.app attribute + cp's CORS allow this.
+  const headers: Record<string, string> = { "Content-Type": "application/json" };
  const slug = getTenantSlug();
+  if (slug) headers["X-Molecule-Org-Slug"] = slug;
+  const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+  if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;

  const res = await fetch(`${PLATFORM_URL}${path}`, {
    method,
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@ -1,43 +0,0 @@
-# docker-compose.dev.yml — overlay over docker-compose.yml for local dev
-# with air-driven live reload of the platform (workspace-server) service.
-#
-# Usage:
-#   docker compose -f docker-compose.yml -f docker-compose.dev.yml up
-#   (or `make dev` shorthand from repo root)
-#
-# What this overlay changes vs docker-compose.yml alone:
-#   - Platform service uses workspace-server/Dockerfile.dev (air on top of
-#     golang:1.25-alpine) instead of the multi-stage prod Dockerfile.
-#   - Platform service bind-mounts the host's workspace-server/ source
-#     into /app/workspace-server so air sees source edits live.
-#   - Other services (postgres, redis, langfuse, etc.) inherit unchanged
-#     from docker-compose.yml.
-#
-# What stays the same:
-#   - All env vars, volumes, depends_on, healthchecks from docker-compose.yml.
-#   - Network topology + ports.
-#   - Postgres/Redis as service containers (no in-process replacements).
-
-services:
-  platform:
-    build:
-      context: .
-      dockerfile: workspace-server/Dockerfile.dev
-    # Rebind source: edits under host's workspace-server/ propagate live.
-    # The named volume on go-build-cache speeds up first build per container.
-    volumes:
-      - ./workspace-server:/app/workspace-server
-      - go-build-cache:/root/.cache/go-build
-      - go-mod-cache:/go/pkg/mod
-    # Air signals the running binary on rebuild; ensure shell stops cleanly.
-    init: true
-    # Mark the service as dev-mode so the platform can short-circuit any
-    # behavior that's incompatible with hot-reload (e.g. background
-    # cron-style watchers that don't survive process restart). No-op
-    # today; reserved for future flag use.
-    environment:
-      MOLECULE_DEV_HOT_RELOAD: "1"
-
-volumes:
-  go-build-cache:
-  go-mod-cache:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -13,7 +13,6 @@ services:
      - pgdata:/var/lib/postgresql/data
    networks:
      - molecule-monorepo-net
-    restart: unless-stopped
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
      interval: 2s
@ -51,7 +50,6 @@ services:
      - redisdata:/data
    networks:
      - molecule-monorepo-net
-    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 2s
@ -128,10 +126,6 @@ services:
      REDIS_URL: redis://redis:6379
      PORT: "${PLATFORM_PORT:-8080}"
      PLATFORM_URL: "http://platform:${PLATFORM_PORT:-8080}"
-      # Container network namespace is already isolated; "all interfaces"
-      # inside the container = the bridge interface only. The fail-open
-      # default (127.0.0.1) would block host-to-container access.
-      BIND_ADDR: "${BIND_ADDR:-0.0.0.0}"
      # Default MOLECULE_ENV=development so the WorkspaceAuth / AdminAuth
      # middleware fail-open path activates when ADMIN_TOKEN is unset —
      # otherwise the canvas (which runs without a bearer in pure local
@ -201,28 +195,12 @@ services:
      # App private key — read-only bind-mount. The host-side path is
      # gitignored per .gitignore rules (/.secrets/ + *.pem).
      - ./.secrets/github-app.pem:/secrets/github-app.pem:ro
-      # Per-role persona credentials (molecule-core#242 local surface).
-      # Sourced at workspace creation time by org_import.go::loadPersonaEnvFile
-      # when a workspace.yaml carries `role: <name>`. The host-side dir is
-      # populated by the operator-host bootstrap kit (28 dev-tree personas);
-      # /etc/molecule-bootstrap/personas is the in-container path the
-      # platform expects (matches the prod tenant-EC2 path so the same code
-      # works in both modes).
-      #
-      # Read-only mount — workspace-server only reads, never writes here.
-      # If the host dir is empty/missing the platform's loadPersonaEnvFile
-      # silently no-ops per its existing semantics, so this mount is safe
-      # even on a fresh machine that hasn't run the bootstrap kit yet.
-      - ${MOLECULE_PERSONA_ROOT_HOST:-${HOME}/.molecule-ai/personas}:/etc/molecule-bootstrap/personas:ro
    ports:
      - "${PLATFORM_PUBLISH_PORT:-8080}:${PLATFORM_PORT:-8080}"
    networks:
      - molecule-monorepo-net
-    restart: unless-stopped
    healthcheck:
-      # Plain GET — `--spider` would issue HEAD, which returns 404 because
-      # /health is registered as GET only.
-      test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
      interval: 5s
      timeout: 5s
      retries: 10
@ -260,7 +238,7 @@ services:
    networks:
      - molecule-monorepo-net
    healthcheck:
-      test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 10
--- a/manifest.json
+++ b/manifest.json
@ -1,5 +1,5 @@
 {
-  "_comment": "OSS surface registry — every repo listed here MUST be public on git.moleculesai.app. Layer-3 customer/private templates are NOT registered here; they are handled at provision-time via the per-tenant credential resolver (see internal#102 RFC). 'main' refs are pinned to tags before broad rollout.",
+  "_comment": "Pin refs to release tags for reproducible builds. 'main' is OK while all repos are internal.",
  "version": 1,
  "plugins": [
    {"name": "browser-automation", "repo": "molecule-ai/molecule-ai-plugin-browser-automation", "ref": "main"},
@ -40,6 +40,7 @@
    {"name": "free-beats-all", "repo": "molecule-ai/molecule-ai-org-template-free-beats-all", "ref": "main"},
    {"name": "medo-smoke", "repo": "molecule-ai/molecule-ai-org-template-medo-smoke", "ref": "main"},
    {"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
+    {"name": "reno-stars", "repo": "molecule-ai/molecule-ai-org-template-reno-stars", "ref": "main"},
    {"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"},
    {"name": "mock-bigorg", "repo": "molecule-ai/molecule-ai-org-template-mock-bigorg", "ref": "main"}
  ]
--- a/scripts/clone-manifest.sh
+++ b/scripts/clone-manifest.sh
@ -8,24 +8,27 @@
 # Requires: git, jq (lighter than python3 — ~2MB vs ~50MB in Alpine)
 #
 # Auth (optional):
-#   Post-2026-05-08 (#192): every repo in manifest.json is public on
-#   git.moleculesai.app. Anonymous clone works for the entire registered
-#   set. The OSS-surface contract is recorded in manifest.json's _comment
-#   — Layer-3 customer/private templates (e.g. reno-stars) are NOT in the
-#   manifest; they are handled at provision-time via the per-tenant
-#   credential resolver (internal#102 RFC).
+#   When MOLECULE_GITEA_TOKEN is set, embed it as the basic-auth password so
+#   private Gitea repos clone successfully. When unset, clone anonymously
+#   (works only for repos that are public on git.moleculesai.app).
 #
-#   MOLECULE_GITEA_TOKEN is therefore optional today. Kept supported for
-#   two reasons: (a) historical CI configs that still inject
-#   AUTO_SYNC_TOKEN remain harmless, (b) reserved for the case where a
-#   private internal-only template is later registered via a ci-readonly
-#   team grant — review must explicitly sign off on that, since it
-#   violates the public-OSS-surface contract.
+#   This is the path the publish-workspace-server-image.yml workflow uses:
+#   it injects AUTO_SYNC_TOKEN (devops-engineer persona PAT, repo:read on
+#   the molecule-ai org) so the in-CI pre-clone step succeeds for ALL
+#   manifest entries — including the 5 private workspace-template-* repos
+#   (codex, crewai, deepagents, gemini-cli, langgraph) and all 7
+#   org-template-* repos.
 #
-#   The token (when set) never enters the Docker image: this script runs
-#   in the trusted CI context BEFORE `docker buildx build`, populates
+#   The token never enters the Docker image: this script runs in the
+#   trusted CI context BEFORE `docker buildx build`, populates
 #   .tenant-bundle-deps/, then `Dockerfile.tenant` COPYs from there with
 #   the .git directories already stripped (see line ~67 below).
+#
+#   For backward compatibility — and so a fresh clone works without
+#   secrets when (eventually) the workspace-template-* repos flip public —
+#   the unset path remains a plain anonymous HTTPS clone. That path will
+#   FAIL with "could not read Username" on private repos today; CI MUST
+#   set MOLECULE_GITEA_TOKEN.

 set -euo pipefail

--- a/workspace-server/.air.toml
+++ b/workspace-server/.air.toml
@ -1,49 +0,0 @@
-# air.toml — live-reload config for local docker-compose dev mode.
-#
-# Active when the platform service runs from workspace-server/Dockerfile.dev
-# (selected via docker-compose.dev.yml overlay). In production, the regular
-# Dockerfile builds a static binary; air is dev-only.
-#
-# Reference: https://github.com/air-verse/air
-
-root = "."
-testdata_dir = "testdata"
-tmp_dir = "tmp"
-
-[build]
-  # Same build invocation as Dockerfile's builder stage minus the
-  # CGO_ENABLED=0 toggle (CGO ok in dev for richer race detector output).
-  cmd = "go build -o ./tmp/server ./cmd/server"
-  bin = "tmp/server"
-  full_bin = ""
-  args_bin = []
-  # Watch every .go and .yaml file under workspace-server/.
-  include_ext = ["go", "yaml", "tmpl"]
-  # Don't watch tests, build artifacts, vendored deps, or migration .sql
-  # (migrations need a clean DB anyway — handled by docker-compose down/up).
-  exclude_dir = ["assets", "tmp", "vendor", "testdata", "node_modules"]
-  exclude_file = []
-  # _test.go and *_mock.go shouldn't trigger a rebuild — saves cycles.
-  exclude_regex = ["_test\\.go$", "_mock\\.go$"]
-  exclude_unchanged = true
-  follow_symlink = false
-  log = "build-errors.log"
-  # Kill running binary 1s before starting new one.
-  kill_delay = "1s"
-  send_interrupt = true
-  stop_on_error = true
-  # Debounce: wait this long after last change before triggering rebuild.
-  delay = 500
-
-[log]
-  time = false
-
-[color]
-  main = "magenta"
-  watcher = "cyan"
-  build = "yellow"
-  runner = "green"
-
-[misc]
-  # Don't keep the tmp/ dir around between runs.
-  clean_on_exit = true
--- a/workspace-server/.gitignore
+++ b/workspace-server/.gitignore
@ -1,5 +1,2 @@
 # The compiled binary, not the cmd/server package.
 /server
-
-# air live-reload build cache (Dockerfile.dev + docker-compose.dev.yml).
-/tmp/
--- a/workspace-server/Dockerfile.dev
+++ b/workspace-server/Dockerfile.dev
@ -1,44 +0,0 @@
-# Dockerfile.dev — local-development image with air-driven live reload.
-#
-# Selected by docker-compose.dev.yml (overlay over docker-compose.yml).
-# Production stays on workspace-server/Dockerfile (static binary, no air).
-#
-# Workflow:
-#   1. docker compose -f docker-compose.yml -f docker-compose.dev.yml up
-#   2. Edit any .go file under workspace-server/
-#   3. air detects, rebuilds, kills old binary, starts new one (~3-5s)
-#   4. No `docker compose up --build` needed
-#
-# Templates + plugins are NOT pre-cloned here — air-mode assumes the
-# developer's filesystem has the workspace-configs-templates/ + plugins/
-# dirs available, mounted at runtime via docker-compose.dev.yml.
-
-FROM golang:1.25-alpine
-
-# air + git (for go mod) + ca-certs (for TLS) + tzdata (for time-zone DB)
-# + docker-cli + docker-cli-buildx so the platform binary can shell out to
-# /var/run/docker.sock (bind-mounted from host) for local-build provisioning.
-# docker-cli alone is insufficient: alpine's docker-cli enables BuildKit by
-# default but ships without buildx, producing
-# `ERROR: BuildKit is enabled but the buildx component is missing or broken`
-# on every `docker build`. docker-cli-buildx provides the buildx subcommand.
-RUN apk add --no-cache git ca-certificates tzdata wget docker-cli docker-cli-buildx \
- && go install github.com/air-verse/air@latest
-
-WORKDIR /app/workspace-server
-
-# Pre-fetch deps so the first `air` rebuild on a fresh container is fast.
-# These are bind-mount-overridden at runtime, so the COPY here is just
-# to warm the module cache.
-COPY workspace-server/go.mod workspace-server/go.sum ./
-RUN go mod download
-
-# Source is bind-mounted at runtime (see docker-compose.dev.yml volumes
-# block) so the Dockerfile doesn't need to COPY it. air watches the
-# bind-mounted dir for changes.
-
-ENV CGO_ENABLED=0
-ENV GOFLAGS="-buildvcs=false"
-
-# Run air with the .air.toml in the bind-mounted source dir.
-CMD ["air", "-c", ".air.toml"]
--- a/workspace-server/internal/handlers/handlers_extended_test.go
+++ b/workspace-server/internal/handlers/handlers_extended_test.go
@ -26,14 +26,6 @@ func TestExtended_WorkspaceDelete(t *testing.T) {
 		WithArgs(wsDelID).
 		WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))

-	// CascadeDelete walks descendants unconditionally (the 0-children
-	// optimization in the old inline path was dropped during the
-	// CascadeDelete extraction — descendant CTE returns 0 rows here,
-	// same end state, one extra cheap query).
-	mock.ExpectQuery("WITH RECURSIVE descendants").
-		WithArgs(wsDelID).
-		WillReturnRows(sqlmock.NewRows([]string{"id"}))
-
 	// #73: batch UPDATE happens BEFORE any container teardown.
 	// Uses ANY($1::uuid[]) even with a single ID for consistency.
 	mock.ExpectExec("UPDATE workspaces SET status =").
--- a/workspace-server/internal/handlers/local_e2e_dev_dept_test.go
+++ b/workspace-server/internal/handlers/local_e2e_dev_dept_test.go
@ -1,375 +0,0 @@
-package handlers
-
-import (
-	"archive/tar"
-	"bytes"
-	"net"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"testing"
-	"time"
-
-	"gopkg.in/yaml.v3"
-)
-
-// Local E2E for the dev-department extraction (RFC internal#77).
-//
-// Pre-conditions: both repos cloned as siblings under
-// /tmp/local-e2e-deploy/{molecule-dev, molecule-dev-department}.
-// (Set up by the orchestrator before running this test.)
-//
-// What this proves end-to-end through real platform code:
-//   1. resolveYAMLIncludes follows the dev-lead symlink at the parent's
-//      template root and pulls in the dev-department subtree.
-//   2. Recursive !include's inside the symlinked subtree resolve
-//      correctly via the chain dev-lead/workspace.yaml →
-//      ./core-lead/workspace.yaml → ./core-be/workspace.yaml etc.
-//   3. The resolved YAML unmarshals into a complete OrgTemplate with the
-//      expected count of workspaces (parent's PM+Marketing+Research +
-//      dev-department's atomized 28 workspaces).
-//
-// Skipped if the local-e2e-deploy fixture isn't present — won't block
-// CI on hosts that haven't set it up.
-func TestLocalE2E_DevDepartmentExtraction(t *testing.T) {
-	parent := "/tmp/local-e2e-deploy/molecule-dev"
-	if _, err := os.Stat(filepath.Join(parent, "org.yaml")); err != nil {
-		t.Skipf("local-e2e fixture not present at %s: %v", parent, err)
-	}
-
-	orgYAML, err := os.ReadFile(filepath.Join(parent, "org.yaml"))
-	if err != nil {
-		t.Fatalf("read org.yaml: %v", err)
-	}
-
-	expanded, err := resolveYAMLIncludes(orgYAML, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes failed: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(expanded, &tmpl); err != nil {
-		t.Fatalf("unmarshal expanded OrgTemplate: %v", err)
-	}
-
-	// Walk the full workspace tree, collect names.
-	names := []string{}
-	var walk func([]OrgWorkspace)
-	walk = func(ws []OrgWorkspace) {
-		for _, w := range ws {
-			names = append(names, w.Name)
-			walk(w.Children)
-		}
-	}
-	walk(tmpl.Workspaces)
-
-	t.Logf("org name: %q", tmpl.Name)
-	t.Logf("total workspaces (recursive): %d", len(names))
-	for _, n := range names {
-		t.Logf("  - %q", n)
-	}
-
-	// Expected: PM + Marketing Lead + Dev Lead at top level, plus the
-	// full sub-trees under each. After atomization, we expect:
-	//   - PM tree: PM + Research Lead + 3 research roles = 5
-	//   - Marketing tree: Marketing Lead + 5 marketing roles = 6
-	//   - Dev Lead tree: Dev Lead + (5 sub-team leads × ~6 each) +
-	//     3 floaters + Triage Operator = ~32
-	// Roughly ~43 total. Be liberal; just assert a floor.
-	if len(names) < 30 {
-		t.Errorf("workspace count too low (%d) — expected ~40+ (PM+Marketing+Dev tree)", len(names))
-	}
-
-	// Specific sentinel names we expect to find:
-	expected := []string{
-		"PM",
-		"Marketing Lead",
-		"Dev Lead",
-		"Core Platform Lead",
-		"Controlplane Lead",
-		"App & Docs Lead",
-		"Infra Lead",
-		"SDK Lead",
-		"Documentation Specialist", // Q1 — should be under app-lead
-		"Triage Operator",          // Q2 — should be under dev-lead
-	}
-	found := map[string]bool{}
-	for _, n := range names {
-		found[n] = true
-	}
-	for _, want := range expected {
-		if !found[want] {
-			t.Errorf("missing expected workspace %q", want)
-		}
-	}
-}
-
-// Stage-2 of the local e2e: prove every resolved workspace's `files_dir`
-// path actually consumes correctly through the rest of the import chain.
-// resolveYAMLIncludes returning a populated OrgTemplate is necessary but
-// not sufficient — `POST /org/import` then does:
-//
-//   1. resolveInsideRoot(orgBaseDir, ws.FilesDir) → must return a path
-//      that exists and stat-resolves to a directory (org_import.go:313-317).
-//   2. CopyTemplateToContainer(ctx, containerID, templatePath) → walks
-//      the dir with filepath.Walk and tars its contents into the
-//      workspace's /configs/ mount (provisioner.go:766-820).
-//
-// This stage-2 test exercises both #1 and #2 against every workspace in
-// the resolved tree, mimicking what the platform does post-include-
-// resolution. Catches: files_dir paths that don't resolve through the
-// symlink, paths that exist but are empty (silently produces empty
-// /configs/), or filepath.Walk failing to descend through cross-repo
-// symlink boundaries.
-func TestLocalE2E_FilesDirConsumption(t *testing.T) {
-	parent := "/tmp/local-e2e-deploy/molecule-dev"
-	if _, err := os.Stat(filepath.Join(parent, "org.yaml")); err != nil {
-		t.Skipf("local-e2e fixture not present at %s: %v", parent, err)
-	}
-
-	orgYAML, err := os.ReadFile(filepath.Join(parent, "org.yaml"))
-	if err != nil {
-		t.Fatalf("read org.yaml: %v", err)
-	}
-	expanded, err := resolveYAMLIncludes(orgYAML, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes: %v", err)
-	}
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(expanded, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-
-	// Flatten every workspace — including children, grandchildren, etc.
-	flat := []OrgWorkspace{}
-	var walk func([]OrgWorkspace)
-	walk = func(ws []OrgWorkspace) {
-		for _, w := range ws {
-			flat = append(flat, w)
-			walk(w.Children)
-		}
-	}
-	walk(tmpl.Workspaces)
-
-	checked := 0
-	for _, w := range flat {
-		if w.FilesDir == "" {
-			continue // workspace declared inline (no files_dir) — skip
-		}
-		checked++
-		t.Run(w.Name+"/"+w.FilesDir, func(t *testing.T) {
-			// Step 1: resolveInsideRoot returns a path that's-inside-root.
-			abs, err := resolveInsideRoot(parent, w.FilesDir)
-			if err != nil {
-				t.Fatalf("resolveInsideRoot(%q, %q): %v", parent, w.FilesDir, err)
-			}
-			info, err := os.Stat(abs)
-			if err != nil {
-				t.Fatalf("stat %q (resolved from files_dir %q): %v", abs, w.FilesDir, err)
-			}
-			if !info.IsDir() {
-				t.Fatalf("files_dir %q resolved to %q which is not a directory", w.FilesDir, abs)
-			}
-
-			// Step 2: walk the dir like CopyTemplateToContainer does.
-			// Mirror the platform's symlink-resolution at the root —
-			// filepath.Walk doesn't descend into a symlink leaf, so
-			// CopyTemplateToContainer (provisioner.go) calls
-			// EvalSymlinks on templatePath first. Replicate exactly.
-			if resolved, err := filepath.EvalSymlinks(abs); err == nil {
-				abs = resolved
-			}
-			var buf bytes.Buffer
-			tw := tar.NewWriter(&buf)
-			fileCount := 0
-			fileNames := []string{}
-			err = filepath.Walk(abs, func(path string, info os.FileInfo, err error) error {
-				if err != nil {
-					return err
-				}
-				rel, err := filepath.Rel(abs, path)
-				if err != nil {
-					return err
-				}
-				if rel == "." {
-					return nil
-				}
-				header, _ := tar.FileInfoHeader(info, "")
-				header.Name = rel
-				if err := tw.WriteHeader(header); err != nil {
-					return err
-				}
-				if !info.IsDir() {
-					fileCount++
-					fileNames = append(fileNames, rel)
-					data, err := os.ReadFile(path)
-					if err != nil {
-						return err
-					}
-					header.Size = int64(len(data))
-					tw.Write(data)
-				}
-				return nil
-			})
-			if err != nil {
-				t.Fatalf("filepath.Walk %q (mimics CopyTemplateToContainer): %v", abs, err)
-			}
-			tw.Close()
-
-			if fileCount == 0 {
-				t.Errorf("files_dir %q at %q is empty — CopyTemplateToContainer would produce empty /configs/",
-					w.FilesDir, abs)
-			}
-
-			// Sanity: every workspace folder should have AT LEAST one of
-			// {workspace.yaml, system-prompt.md, initial-prompt.md} —
-			// these are the markers a workspace folder is recognizable
-			// as a workspace (mirrors validator's WORKSPACE_FOLDER_MARKERS).
-			markers := []string{"workspace.yaml", "system-prompt.md", "initial-prompt.md"}
-			hasMarker := false
-			for _, name := range fileNames {
-				for _, m := range markers {
-					if name == m || strings.HasSuffix(name, "/"+m) {
-						hasMarker = true
-						break
-					}
-				}
-				if hasMarker {
-					break
-				}
-			}
-			if !hasMarker {
-				t.Errorf("files_dir %q at %q has %d files but none of the workspace markers %v — found: %v",
-					w.FilesDir, abs, fileCount, markers, fileNames)
-			}
-		})
-	}
-	t.Logf("checked %d workspaces with files_dir", checked)
-	if checked < 25 {
-		t.Errorf("expected ~28 workspaces with files_dir (post-atomization); only saw %d", checked)
-	}
-}
-
-// PR-C from the Phase 3a phasing (task #234): real-Gitea e2e for the
-// !external resolver against the LIVE molecule-ai/molecule-dev-department
-// repo. Verifies the production gitFetcher fetches the dev tree and the
-// resolver grafts it correctly into a parent template that has NO
-// symlink — composition is purely platform-side.
-//
-// Skipped if Gitea isn't reachable (offline / firewall / CI without
-// network). Requires `git` binary on PATH.
-func TestLocalE2E_ExternalDevDepartment(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-
-	// Skip if Gitea host isn't reachable (TCP probe). Avoids network-
-	// dependent tests failing on offline runners.
-	conn, err := net.DialTimeout("tcp", "git.moleculesai.app:443", 3*time.Second)
-	if err != nil {
-		t.Skipf("git.moleculesai.app:443 unreachable: %v", err)
-	}
-	conn.Close()
-
-	// Build a minimal parent template inline — no need for the
-	// /tmp/local-e2e-deploy/ symlinked fixture. The whole point of
-	// !external is that the parent template is self-contained;
-	// composition resolves over the network at import time.
-	parent := t.TempDir()
-
-	orgYAML := []byte(`name: External-Only Test Parent
-description: Parent template that pulls the entire dev tree via !external.
-defaults:
-  runtime: claude-code
-  tier: 2
-workspaces:
-  - !external
-    repo: molecule-ai/molecule-dev-department
-    ref: main
-    path: dev-lead/workspace.yaml
-`)
-	if err := os.WriteFile(filepath.Join(parent, "org.yaml"), orgYAML, 0o644); err != nil {
-		t.Fatalf("write org.yaml: %v", err)
-	}
-
-	out, err := resolveYAMLIncludes(orgYAML, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes (!external against live Gitea): %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-
-	// Walk the workspace tree, collect names + check files_dir paths.
-	flat := []OrgWorkspace{}
-	var walk func([]OrgWorkspace)
-	walk = func(ws []OrgWorkspace) {
-		for _, w := range ws {
-			flat = append(flat, w)
-			walk(w.Children)
-		}
-	}
-	walk(tmpl.Workspaces)
-
-	t.Logf("workspaces resolved through !external: %d", len(flat))
-	if len(flat) < 25 {
-		t.Errorf("expected ~28 dev-tree workspaces via !external; got %d", len(flat))
-	}
-
-	// Sentinel checks — same as TestLocalE2E_DevDepartmentExtraction
-	// (Q1+Q2 placements verified).
-	expected := []string{
-		"Dev Lead",
-		"Core Platform Lead",
-		"Controlplane Lead",
-		"App & Docs Lead",
-		"Documentation Specialist", // Q1
-		"Triage Operator",          // Q2
-	}
-	found := map[string]bool{}
-	for _, w := range flat {
-		found[w.Name] = true
-	}
-	for _, want := range expected {
-		if !found[want] {
-			t.Errorf("missing expected workspace %q", want)
-		}
-	}
-
-	// Every workspace's files_dir must be cache-prefixed (proves the
-	// path-rewrite ran end-to-end).
-	cachePrefix := ".external-cache"
-	for _, w := range flat {
-		if w.FilesDir == "" {
-			continue
-		}
-		if !strings.HasPrefix(w.FilesDir, cachePrefix) {
-			t.Errorf("workspace %q files_dir %q missing cache prefix %q", w.Name, w.FilesDir, cachePrefix)
-		}
-	}
-
-	// Verify the fetched cache exists and resolveInsideRoot accepts
-	// every workspace's files_dir (would cause provisioning to fail
-	// if not).
-	for _, w := range flat {
-		if w.FilesDir == "" {
-			continue
-		}
-		abs, err := resolveInsideRoot(parent, w.FilesDir)
-		if err != nil {
-			t.Errorf("workspace %q files_dir %q: resolveInsideRoot: %v", w.Name, w.FilesDir, err)
-			continue
-		}
-		info, err := os.Stat(abs)
-		if err != nil {
-			t.Errorf("workspace %q: stat %q: %v", w.Name, abs, err)
-			continue
-		}
-		if !info.IsDir() {
-			t.Errorf("workspace %q files_dir %q is not a directory", w.Name, w.FilesDir)
-		}
-	}
-}
--- a/workspace-server/internal/handlers/org.go
+++ b/workspace-server/internal/handlers/org.go
@ -13,15 +13,12 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
-	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
 	"github.com/gin-gonic/gin"
-	"github.com/lib/pq"
 	"gopkg.in/yaml.v3"
 )

@ -425,16 +422,6 @@ type OrgWorkspace struct {
 	Tier     int    `yaml:"tier" json:"tier"`
 	Template string `yaml:"template" json:"template"`
 	FilesDir string `yaml:"files_dir" json:"files_dir"`
-	// Spawning gates whether this workspace (AND its descendants) gets
-	// provisioned during /org/import. Pointer so we can distinguish
-	// "explicitly set to false" from "unset" (default = spawn). Use case:
-	// the dev-tree org template declares the full team structure but a
-	// developer's local machine only has RAM for a subset; setting
-	// spawning: false on a leaf or a sub-tree root skips that branch
-	// entirely without editing the canonical template structure.
-	// Counted in countWorkspaces same as actual; subtree-skip happens
-	// at provision time in createWorkspaceTree.
-	Spawning *bool `yaml:"spawning,omitempty" json:"spawning,omitempty"`
 	// SystemPrompt is an inline override. Normally each role's system-prompt.md
 	// lives at `<files_dir>/system-prompt.md` and is copied via the files_dir
 	// template-copy step; inline overrides that path for ad-hoc workspaces.
@ -571,19 +558,6 @@ func (h *OrgHandler) Import(c *gin.Context) {
 	var body struct {
 		Dir      string      `json:"dir"`      // org template directory name
 		Template OrgTemplate `json:"template"` // or inline template
-		// Mode controls cleanup behavior of pre-existing workspaces:
-		//   ""        / "merge"     — additive (default; current behavior).
-		//                              Existing workspaces matched by
-		//                              (parent_id, name) are skipped; nothing
-		//                              outside the new tree is touched.
-		//   "reconcile"             — additive + cleanup. After import, any
-		//                              online workspace whose name matches an
-		//                              imported workspace's name but whose id
-		//                              isn't in the import result set is
-		//                              cascade-deleted. Catches "previous
-		//                              import survived a re-import" zombies
-		//                              (the 20:13→21:17 dev-tree case).
-		Mode string `json:"mode"`
 	}
 	if err := c.ShouldBindJSON(&body); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
@ -629,19 +603,6 @@ func (h *OrgHandler) Import(c *gin.Context) {
 		return
 	}

-	// Emit started AFTER the YAML is loaded so payload.name carries the
-	// resolved template name (was: empty when caller passed `dir` instead
-	// of inline `template`). Pre-parse error paths above return without
-	// emitting — semantically "we couldn't even start an import" — so
-	// every started event is guaranteed a paired completed/failed below
-	// (no orphan started rows in structure_events).
-	importStart := time.Now()
-	emitOrgEvent(c.Request.Context(), "org.import.started", map[string]any{
-		"name": tmpl.Name,
-		"dir":  body.Dir,
-		"mode": body.Mode,
-	})
-
 	// Required-env preflight — refuses import when any required_env is
 	// missing from global_secrets. No bypass: the prior `force: true`
 	// escape hatch was removed (issue #2290) because it was the silent
@ -747,171 +708,18 @@ func (h *OrgHandler) Import(c *gin.Context) {
 		}
 	}

-	// Reconcile mode: prune workspaces present from a previous import that
-	// share a name with the new tree but are NOT in the new result set.
-	// Catches the additive-import bug where re-running /org/import with a
-	// changed tree shape (different parent_id for the same role name) leaves
-	// the prior workspace online — visible to the canvas, consuming
-	// containers, and looking like a duplicate. Default mode "" / "merge"
-	// preserves the old additive behavior.
-	reconcileRemovedCount := 0
-	reconcileSkipped := 0
-	reconcileErrs := []string{}
-	if body.Mode == "reconcile" && createErr == nil {
-		ctx := c.Request.Context()
-		importedNames := []string{}
-		walkOrgWorkspaceNames(tmpl.Workspaces, &importedNames)
-
-		importedIDs := make([]string, 0, len(results))
-		for _, r := range results {
-			if id, ok := r["id"].(string); ok && id != "" {
-				importedIDs = append(importedIDs, id)
-			}
-		}
-
-		// Empty-set guards: if the import didn't produce any names or any
-		// IDs, skip — querying with empty arrays would either match
-		// nothing (harmless) or, worse, match every workspace if a future
-		// query rewrite drops the IN clause. Belt-and-suspenders.
-		if len(importedNames) > 0 && len(importedIDs) > 0 {
-			rows, err := db.DB.QueryContext(ctx, `
-				SELECT id FROM workspaces
-				WHERE name = ANY($1::text[])
-				  AND id != ALL($2::uuid[])
-				  AND status != 'removed'
-			`, pq.Array(importedNames), pq.Array(importedIDs))
-			if err != nil {
-				log.Printf("Org import reconcile: orphan query failed: %v", err)
-				reconcileErrs = append(reconcileErrs, fmt.Sprintf("orphan query: %v", err))
-			} else {
-				orphanIDs := []string{}
-				for rows.Next() {
-					var orphanID string
-					if rows.Scan(&orphanID) == nil {
-						orphanIDs = append(orphanIDs, orphanID)
-					}
-				}
-				rows.Close()
-
-				for _, oid := range orphanIDs {
-					descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
-					if err != nil {
-						log.Printf("Org import reconcile: CascadeDelete(%s) failed: %v", oid, err)
-						reconcileErrs = append(reconcileErrs, fmt.Sprintf("delete %s: %v", oid, err))
-						reconcileSkipped++
-						continue
-					}
-					reconcileRemovedCount += 1 + len(descendantIDs)
-					if len(stopErrs) > 0 {
-						log.Printf("Org import reconcile: %s had %d stop errors (orphan sweeper will retry)", oid, len(stopErrs))
-					}
-				}
-				log.Printf("Org import reconcile: %d orphans removed (%d cascade descendants), %d skipped", len(orphanIDs), reconcileRemovedCount-len(orphanIDs), reconcileSkipped)
-			}
-		}
-	}
-
 	status := http.StatusCreated
 	resp := gin.H{
 		"org":        tmpl.Name,
 		"workspaces": results,
 		"count":      len(results),
 	}
-	if body.Mode == "reconcile" {
-		resp["mode"] = "reconcile"
-		resp["reconcile_removed_count"] = reconcileRemovedCount
-		if len(reconcileErrs) > 0 {
-			resp["reconcile_errors"] = reconcileErrs
-		}
-	}
 	if createErr != nil {
 		status = http.StatusMultiStatus
 		resp["error"] = createErr.Error()
 	}

-	// results contains both freshly-created AND lookupExistingChild skips
-	// (entries with "skipped":true). Splitting the count here so the audit
-	// row reflects "what changed" vs "what was already there" — telemetry
-	// readers shouldn't need to grep stdout to tell an idempotent re-run
-	// apart from a fresh-create.
-	createdCount, skippedCount := 0, 0
-	for _, r := range results {
-		if skipped, _ := r["skipped"].(bool); skipped {
-			skippedCount++
-		} else {
-			createdCount++
-		}
-	}
-	log.Printf("Org import: %s — %d created, %d skipped, %d reconciled",
-		tmpl.Name, createdCount, skippedCount, reconcileRemovedCount)
-	emitOrgEvent(c.Request.Context(), "org.import.completed", map[string]any{
-		"name":                    tmpl.Name,
-		"dir":                     body.Dir,
-		"mode":                    body.Mode,
-		"created_count":           createdCount,
-		"skipped_count":           skippedCount,
-		"reconcile_removed_count": reconcileRemovedCount,
-		"reconcile_errors":        len(reconcileErrs),
-		"duration_ms":             time.Since(importStart).Milliseconds(),
-		"create_error":            errString(createErr),
-	})
+	log.Printf("Org import: %s — %d workspaces created", tmpl.Name, len(results))
 	c.JSON(status, resp)
 }

-// walkOrgWorkspaceNames collects every Name in the tree (in any order) into
-// names. Used by reconcile to detect orphan workspaces — workspaces with the
-// same role name as a freshly-imported one but a different id, surviving from
-// a prior import.
-func walkOrgWorkspaceNames(workspaces []OrgWorkspace, names *[]string) {
-	for _, w := range workspaces {
-		// spawning:false subtrees are still part of the imported tree
-		// from a logical-tree perspective — DON'T skip the recursion,
-		// or reconcile would orphan the rest of the subtree on every
-		// re-import where spawning is toggled. Names of skipped
-		// workspaces remain registered so reconcile won't double-create
-		// them when spawning flips back to true.
-		if w.Name != "" {
-			*names = append(*names, w.Name)
-		}
-		walkOrgWorkspaceNames(w.Children, names)
-	}
-}
-
-// emitOrgEvent records an org-lifecycle event in structure_events so the
-// import history is queryable independent of stdout log retention. Errors
-// are logged and swallowed — never block the request path on telemetry.
-//
-// Event-type taxonomy (extend by appending; never rename):
-//
-//	org.import.started        — handler entered, request body parsed
-//	org.import.completed      — handler exiting (success or partial)
-//	org.import.failed         — handler exiting with an unrecoverable error
-//
-// payload fields are documented at each call site.
-func emitOrgEvent(ctx context.Context, eventType string, payload map[string]any) {
-	if payload == nil {
-		payload = map[string]any{}
-	}
-	payloadJSON, err := json.Marshal(payload)
-	if err != nil {
-		log.Printf("emitOrgEvent: marshal %s payload failed: %v", eventType, err)
-		return
-	}
-	if _, err := db.DB.ExecContext(ctx, `
-		INSERT INTO structure_events (event_type, payload, created_at)
-		VALUES ($1, $2, now())
-	`, eventType, payloadJSON); err != nil {
-		log.Printf("emitOrgEvent: insert %s failed: %v", eventType, err)
-	}
-}
-
-// errString returns "" for a nil error, err.Error() otherwise. Lets us put
-// nullable error strings in event payloads without checking for nil at every
-// call site.
-func errString(err error) string {
-	if err == nil {
-		return ""
-	}
-	return err.Error()
-}
-
--- a/workspace-server/internal/handlers/org_external.go
+++ b/workspace-server/internal/handlers/org_external.go
@ -1,439 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"fmt"
-	"net/url"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"regexp"
-	"strings"
-	"time"
-
-	"gopkg.in/yaml.v3"
-)
-
-// External-ref resolver — gitops-style cross-repo subtree composition.
-// Internal#77 RFC, Phase 3a (task #222). Prior art: Helm subcharts +
-// dependency cache, Kustomize remote bases, Terraform module sources.
-//
-// Schema (a `!external`-tagged mapping anywhere a workspace entry is
-// allowed — workspaces:, roots:, children:):
-//
-//   - !external
-//     repo: molecule-ai/molecule-dev-department
-//     ref: main
-//     path: dev-lead/workspace.yaml
-//
-// At resolve time, the platform fetches the repo at ref into a content-
-// addressable cache under <rootDir>/.external-cache/<repo>/<sha>/, loads
-// the yaml at <cacheDir>/<path>, rewrites every files_dir + relative
-// !include path to be cache-prefixed, then grafts the result in place of
-// the !external node. Downstream pipeline (resolveInsideRoot, plugin
-// merge, CopyTemplateToContainer) sees ordinary in-tree paths.
-
-// ExternalRef is the deserialized form of an `!external`-tagged mapping.
-type ExternalRef struct {
-	Repo string `yaml:"repo"`
-	Ref  string `yaml:"ref"`
-	Path string `yaml:"path"`
-
-	// URL overrides the default Gitea host. Optional; defaults to
-	// MOLECULE_EXTERNAL_GITEA_URL env or git.moleculesai.app.
-	URL string `yaml:"url,omitempty"`
-}
-
-const (
-	// maxExternalDepth caps recursion through nested `!external`s. Lower
-	// than maxIncludeDepth (16) because each level may issue a network
-	// fetch. Composition that genuinely needs >4 layers is a smell.
-	maxExternalDepth = 4
-
-	// externalCacheDirName is the per-template cache subdir under rootDir.
-	// Content-addressable: keyed by (repo, sha). Operators add this to
-	// .gitignore — cache is platform-mutated, not source-tracked.
-	externalCacheDirName = ".external-cache"
-
-	// gitFetchTimeout caps a single clone operation. Conservative —
-	// org template fetches are typically <100KB.
-	gitFetchTimeout = 60 * time.Second
-)
-
-// safeRefPattern restricts `ref` values to characters git itself accepts
-// for branch / tag / SHA. Belt-and-braces over git's own validation.
-var safeRefPattern = regexp.MustCompile(`^[a-zA-Z0-9_./-]+$`)
-
-// allowlistedHostPath returns true if `<host>/<repo>` matches the
-// configured allowlist. Default allowlist: git.moleculesai.app/molecule-ai/.
-// Override via MOLECULE_EXTERNAL_REPO_ALLOWLIST env var (comma-separated
-// patterns). Patterns are matched as prefixes (with trailing-slash
-// semantics) or as exact matches. Trailing /* is treated as "any
-// descendants of this prefix".
-//
-// Examples:
-//   - "git.moleculesai.app/molecule-ai/" → matches molecule-ai/* (any repo)
-//   - "git.moleculesai.app/molecule-ai/*" → same; trailing /* normalized to /
-//   - "git.moleculesai.app/molecule-ai/molecule-dev-department" → exact
-//   - "git.moleculesai.app/" → matches everything on that host
-func allowlistedHostPath(host, repoPath string) bool {
-	allow := os.Getenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST")
-	if allow == "" {
-		allow = "git.moleculesai.app/molecule-ai/"
-	}
-	hp := host + "/" + repoPath
-	for _, pat := range strings.Split(allow, ",") {
-		pat = strings.TrimSpace(pat)
-		if pat == "" {
-			continue
-		}
-		// Normalize trailing /* → /
-		pat = strings.TrimSuffix(pat, "*")
-		if pat == hp {
-			return true
-		}
-		if strings.HasSuffix(pat, "/") && strings.HasPrefix(hp+"/", pat) {
-			return true
-		}
-	}
-	return false
-}
-
-// externalFetcher abstracts the git-clone-into-cache step. Production
-// uses gitFetcher (shells out to git); tests inject a fake that
-// pre-stages content in a temp dir.
-type externalFetcher interface {
-	// Fetch ensures rootDir/.external-cache/<safe-repo>/<sha>/ contains
-	// the repo content at the given ref. Returns the absolute cache
-	// dir + the resolved SHA. Cache hit = no network. Cache miss =
-	// clone.
-	Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (cacheDir, sha string, err error)
-}
-
-// defaultExternalFetcher is the package-level fetcher injection point.
-// Production code uses the git-shell fetcher; tests override via
-// SetExternalFetcherForTest.
-var defaultExternalFetcher externalFetcher = &gitFetcher{}
-
-// SetExternalFetcherForTest swaps the fetcher for testing. Returns a
-// cleanup func that restores the previous fetcher.
-func SetExternalFetcherForTest(f externalFetcher) func() {
-	prev := defaultExternalFetcher
-	defaultExternalFetcher = f
-	return func() { defaultExternalFetcher = prev }
-}
-
-// resolveExternalMapping replaces an `!external`-tagged mapping node
-// with the loaded + path-rewritten yaml content from the fetched repo.
-//
-// `currentDir` and `rootDir` are inherited from expandNode's resolve
-// frame. `visited` tracks (repo, sha, path) tuples for cycle detection
-// across nested externals.
-func resolveExternalMapping(n *yaml.Node, currentDir, rootDir string, visited map[string]bool, depth int) error {
-	if depth > maxExternalDepth {
-		return fmt.Errorf("!external: max depth %d exceeded (possible cycle)", maxExternalDepth)
-	}
-	if rootDir == "" {
-		return fmt.Errorf("!external at line %d requires a dir-based org template (no rootDir in inline-template mode)", n.Line)
-	}
-
-	var ref ExternalRef
-	if err := n.Decode(&ref); err != nil {
-		return fmt.Errorf("!external at line %d: decode: %w", n.Line, err)
-	}
-	if ref.Repo == "" || ref.Ref == "" || ref.Path == "" {
-		return fmt.Errorf("!external at line %d: repo, ref, path are all required (got %+v)", n.Line, ref)
-	}
-	if !safeRefPattern.MatchString(ref.Ref) {
-		return fmt.Errorf("!external at line %d: ref %q contains disallowed characters", n.Line, ref.Ref)
-	}
-	// Defense-in-depth: even though git itself rejects refs containing
-	// `..`, the regex above currently allows them. Reject explicitly.
-	if strings.Contains(ref.Ref, "..") {
-		return fmt.Errorf("!external at line %d: ref %q must not contain '..'", n.Line, ref.Ref)
-	}
-	if strings.Contains(ref.Path, "..") || strings.HasPrefix(ref.Path, "/") {
-		return fmt.Errorf("!external at line %d: path %q must be relative-and-down-only", n.Line, ref.Path)
-	}
-
-	host := ref.URL
-	if host == "" {
-		host = os.Getenv("MOLECULE_EXTERNAL_GITEA_URL")
-	}
-	if host == "" {
-		host = "git.moleculesai.app"
-	}
-	host = strings.TrimPrefix(strings.TrimPrefix(host, "https://"), "http://")
-	host = strings.TrimSuffix(host, "/")
-
-	if !allowlistedHostPath(host, ref.Repo) {
-		return fmt.Errorf("!external at line %d: %s/%s not in MOLECULE_EXTERNAL_REPO_ALLOWLIST", n.Line, host, ref.Repo)
-	}
-
-	ctx, cancel := context.WithTimeout(context.Background(), gitFetchTimeout)
-	defer cancel()
-
-	cacheDir, sha, err := defaultExternalFetcher.Fetch(ctx, rootDir, host, ref.Repo, ref.Ref)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: fetch %s/%s@%s: %w", n.Line, host, ref.Repo, ref.Ref, err)
-	}
-
-	// Cycle key: (repo, sha, path) — same external content reachable
-	// via two paths is fine, but a self-referential cycle isn't.
-	cycleKey := fmt.Sprintf("%s/%s@%s/%s", host, ref.Repo, sha, ref.Path)
-	if visited[cycleKey] {
-		return fmt.Errorf("!external cycle detected at %q (line %d)", cycleKey, n.Line)
-	}
-
-	// Validate path resolves inside the cache dir (anti-traversal).
-	yamlPathAbs, err := resolveInsideRoot(cacheDir, ref.Path)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: path %q: %w", n.Line, ref.Path, err)
-	}
-	if _, err := os.Stat(yamlPathAbs); err != nil {
-		return fmt.Errorf("!external at line %d: %s/%s@%s does not contain %q: %w", n.Line, host, ref.Repo, sha, ref.Path, err)
-	}
-
-	data, err := os.ReadFile(yamlPathAbs)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: read %q: %w", n.Line, yamlPathAbs, err)
-	}
-
-	var sub yaml.Node
-	if err := yaml.Unmarshal(data, &sub); err != nil {
-		return fmt.Errorf("!external at line %d: parse %q: %w", n.Line, yamlPathAbs, err)
-	}
-	root := &sub
-	if root.Kind == yaml.DocumentNode && len(root.Content) == 1 {
-		root = root.Content[0]
-	}
-
-	// Recurse FIRST: load all nested !include / !external content into
-	// the tree. Then rewrite ALL files_dir scalars in the fully-resolved
-	// tree (top + nested) with the cache prefix in one pass. Doing
-	// rewrite-before-recurse would leave nested-loaded files_dir paths
-	// unprefixed.
-	visited[cycleKey] = true
-	defer delete(visited, cycleKey)
-
-	subDir := filepath.Dir(yamlPathAbs)
-	if err := expandNode(root, subDir, rootDir, visited, depth+1); err != nil {
-		return err
-	}
-
-	// Path rewrite: prefix every files_dir scalar in the fully-resolved
-	// content with the cache-relative-from-rootDir prefix. After this
-	// pass, fetched workspaces look like ordinary in-tree workspaces.
-	cachePrefix, err := filepath.Rel(rootDir, cacheDir)
-	if err != nil {
-		return fmt.Errorf("!external at line %d: cannot compute cache prefix: %w", n.Line, err)
-	}
-	rewriteFilesDir(root, cachePrefix)
-
-	// Replace the !external mapping with the resolved content in-place.
-	*n = *root
-	if n.Tag == "!external" {
-		n.Tag = ""
-	}
-	return nil
-}
-
-// rewriteFilesDir walks the yaml node tree and prepends cachePrefix to
-// every files_dir scalar value. Idempotent: if a files_dir value already
-// starts with the prefix, no-op.
-//
-// !include paths are intentionally NOT rewritten. They resolve relative
-// to their containing file's directory (subDir in expandNode), and after
-// fetch that directory IS inside the cache, so relative !include paths
-// Just Work without any rewrite. Rewriting them would double-prefix on
-// recursive resolution.
-//
-// files_dir DOES need rewriting because it's consumed at workspace-
-// provisioning time relative to orgBaseDir (the parent template's root),
-// not relative to the workspace.yaml's containing dir.
-func rewriteFilesDir(n *yaml.Node, cachePrefix string) {
-	if n == nil {
-		return
-	}
-	if n.Kind == yaml.MappingNode {
-		for i := 0; i+1 < len(n.Content); i += 2 {
-			key, value := n.Content[i], n.Content[i+1]
-			if key.Kind == yaml.ScalarNode && key.Value == "files_dir" && value.Kind == yaml.ScalarNode {
-				if !strings.HasPrefix(value.Value, cachePrefix+string(filepath.Separator)) && value.Value != cachePrefix {
-					value.Value = filepath.Join(cachePrefix, value.Value)
-				}
-			}
-		}
-	}
-	for _, child := range n.Content {
-		rewriteFilesDir(child, cachePrefix)
-	}
-}
-
-// safeRepoCacheDir converts a repo path like "molecule-ai/foo" into a
-// filesystem-safe segment "molecule-ai__foo". Avoids nesting cache dirs
-// (which would complicate cleanup).
-func safeRepoCacheDir(host, repoPath string) string {
-	hp := host + "/" + repoPath
-	hp = strings.ReplaceAll(hp, "/", "__")
-	hp = strings.ReplaceAll(hp, ":", "_")
-	return hp
-}
-
-// gitFetcher is the production externalFetcher: shells out to `git` to
-// clone the repo at ref into the cache dir. Cache key includes the
-// resolved SHA, so different SHAs of the same ref get different cache
-// dirs (no overwrite).
-//
-// Token handling — important for security. The auth token never enters
-// the clone URL (and therefore never lands in the cloned repo's
-// .git/config) and never appears in returned errors. We use git's
-// `http.extraHeader` config option (passed via `-c`), which sends an
-// Authorization header per-request without persisting it. The token is
-// briefly visible in the `git` process's argv (so other local users
-// with the same uid could see it via `ps`), which is the same exposure
-// it has via the env var that supplied it.
-//
-// Cache validity uses a `.complete` marker written after a successful
-// clone+rename. Cache-hit checks for the marker, not just the dir
-// existence — a partially-written cache (clone failed mid-way, or a
-// concurrent caller wrote a half-baked cache dir) is treated as cache
-// miss and re-fetched cleanly.
-type gitFetcher struct{}
-
-// cacheCompleteMarker is the filename written after a successful clone.
-// Cache-hit requires this marker; without it, the cache dir is treated
-// as partially-written and re-fetched.
-const cacheCompleteMarker = ".complete"
-
-// Fetch resolves ref → SHA via `git ls-remote`, then `git clone --depth=1`
-// if the cache dir is missing or incomplete. Auth via MOLECULE_GITEA_TOKEN
-// injected via http.extraHeader (never via URL).
-func (g *gitFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
-	cacheRoot := filepath.Join(rootDir, externalCacheDirName, safeRepoCacheDir(host, repoPath))
-	if err := os.MkdirAll(cacheRoot, 0o755); err != nil {
-		return "", "", fmt.Errorf("mkdir cache root: %w", err)
-	}
-
-	cloneURL := buildExternalCloneURL(host, repoPath)
-	gitArgs := func(extra ...string) []string {
-		args := authConfigArgs()
-		return append(args, extra...)
-	}
-
-	// 1. Resolve ref → SHA (so cache dir is content-addressable).
-	sha, err := g.resolveRefToSHA(ctx, cloneURL, ref, gitArgs)
-	if err != nil {
-		return "", "", fmt.Errorf("ls-remote: %s", redactToken(err.Error()))
-	}
-
-	cacheDir := filepath.Join(cacheRoot, sha)
-	// Cache-hit requires the .complete marker AND the .git dir.
-	// Without the marker, cache is partially-written → treat as miss.
-	if isCacheComplete(cacheDir) {
-		return cacheDir, sha, nil
-	}
-
-	// Cache miss or partially-written — clean any stale cacheDir before
-	// cloning (a previous broken attempt would otherwise block rename).
-	os.RemoveAll(cacheDir)
-
-	// 2. Clone into a sibling tmp dir; atomic rename on success.
-	tmpDir, err := os.MkdirTemp(cacheRoot, sha+".tmp.")
-	if err != nil {
-		return "", "", fmt.Errorf("mkdir tmp: %w", err)
-	}
-	// MkdirTemp creates the dir; git clone refuses to clone into a
-	// non-empty dir. Remove + recreate empty.
-	os.RemoveAll(tmpDir)
-	cloneAndConfig := append(gitArgs("clone", "--quiet", "--depth=1", "-b", ref, cloneURL, tmpDir))
-	cmd := exec.CommandContext(ctx, "git", cloneAndConfig...)
-	cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
-	if out, err := cmd.CombinedOutput(); err != nil {
-		os.RemoveAll(tmpDir)
-		return "", "", fmt.Errorf("git clone: %w: %s", err, redactToken(strings.TrimSpace(string(out))))
-	}
-
-	// Write the .complete marker BEFORE the rename. If rename succeeds,
-	// the marker is in place. If rename loses the race (concurrent
-	// fetcher won), our tmp gets cleaned up and we trust the winner.
-	if err := os.WriteFile(filepath.Join(tmpDir, cacheCompleteMarker), []byte(time.Now().UTC().Format(time.RFC3339)), 0o644); err != nil {
-		os.RemoveAll(tmpDir)
-		return "", "", fmt.Errorf("write complete marker: %w", err)
-	}
-
-	if err := os.Rename(tmpDir, cacheDir); err != nil {
-		// Race: another import beat us. Validate THEIR cache, accept it.
-		os.RemoveAll(tmpDir)
-		if isCacheComplete(cacheDir) {
-			return cacheDir, sha, nil
-		}
-		return "", "", fmt.Errorf("rename clone to cache (and winner's cache is incomplete): %w", err)
-	}
-	return cacheDir, sha, nil
-}
-
-// isCacheComplete reports whether cacheDir contains both the cloned
-// repo (.git) and the .complete marker. Treats partial state as miss.
-func isCacheComplete(cacheDir string) bool {
-	if _, err := os.Stat(filepath.Join(cacheDir, ".git")); err != nil {
-		return false
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir, cacheCompleteMarker)); err != nil {
-		return false
-	}
-	return true
-}
-
-func (g *gitFetcher) resolveRefToSHA(ctx context.Context, cloneURL, ref string, gitArgs func(...string) []string) (string, error) {
-	args := gitArgs("ls-remote", cloneURL, ref)
-	cmd := exec.CommandContext(ctx, "git", args...)
-	cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
-	out, err := cmd.Output()
-	if err != nil {
-		return "", err
-	}
-	line := strings.TrimSpace(string(out))
-	if line == "" {
-		return "", fmt.Errorf("ref %q not found", ref)
-	}
-	// First whitespace-separated field is the SHA.
-	for i, ch := range line {
-		if ch == ' ' || ch == '\t' {
-			return line[:i], nil
-		}
-	}
-	return line, nil
-}
-
-// buildExternalCloneURL constructs the clone URL WITHOUT auth in userinfo.
-// Auth is layered on via authConfigArgs's http.extraHeader.
-func buildExternalCloneURL(host, repoPath string) string {
-	u := url.URL{Scheme: "https", Host: host, Path: "/" + repoPath + ".git"}
-	return u.String()
-}
-
-// authConfigArgs returns the `-c http.extraHeader=Authorization: token X`
-// args to pass to git, OR an empty slice if no token is set. The token
-// goes into the request headers (not the URL or .git/config), so it
-// doesn't persist on disk and doesn't appear in clone error output.
-func authConfigArgs() []string {
-	token := os.Getenv("MOLECULE_GITEA_TOKEN")
-	if token == "" {
-		return nil
-	}
-	return []string{"-c", "http.extraHeader=Authorization: token " + token}
-}
-
-// redactToken scrubs the auth token from a string before it's logged
-// or returned in an error. Belt-and-braces: with the http.extraHeader
-// approach the token shouldn't appear in git's output, but if some
-// future git version or libcurl debug mode emits it, this catches it.
-func redactToken(s string) string {
-	token := os.Getenv("MOLECULE_GITEA_TOKEN")
-	if token == "" || len(token) < 8 {
-		return s
-	}
-	return strings.ReplaceAll(s, token, "<redacted-token>")
-}
-
--- a/workspace-server/internal/handlers/org_external_integration_test.go
+++ b/workspace-server/internal/handlers/org_external_integration_test.go
@ -1,379 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"runtime"
-	"strings"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-// PR-B integration test: exercises the REAL gitFetcher (no fakeFetcher
-// injection) against a local bare-git repo. Uses git's `insteadOf`
-// config to rewrite the configured Gitea URL to the local bare path
-// at clone time, so the fetcher's URL-building, ls-remote, clone,
-// atomic-rename, and cache-hit paths all run against real git
-// without requiring network or modifying production code.
-//
-// Internal#77 task #233 (PR-B from the design's phasing).
-
-// TestGitFetcher_RealClone_LocalRedirect proves the production
-// gitFetcher round-trips correctly against a real git repository.
-// Steps:
-//   1. Set up a local bare-git repo with workspace content.
-//   2. Configure git's `insteadOf` to rewrite the gitea URL → local path
-//      via GIT_CONFIG_COUNT/KEY/VALUE env vars (process-scoped).
-//   3. Run resolveYAMLIncludes with !external pointing at the gitea URL.
-//   4. Assert: cache dir populated; content materialized; path rewrite
-//      applied; second invocation hits cache (no second clone).
-func TestGitFetcher_RealClone_LocalRedirect(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-
-	if runtime.GOOS == "windows" {
-		t.Skip("path-based git URLs behave differently on Windows; skipping")
-	}
-
-	// Step 1: create a local bare-git repo at <fixtures>/test-dev-dept.git
-	// with workspace content. Use a working clone to add content, then
-	// push to the bare.
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "test-dev-dept.git")
-	workPath := filepath.Join(fixtures, "work")
-
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "test@example.com")
-	mustGit(t, workPath, "config", "user.name", "Integration Test")
-
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/workspace.yaml"), `name: Dev Lead
-files_dir: dev-lead
-children:
-  - !include ./core-be/workspace.yaml
-`)
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/system-prompt.md"), "Dev Lead persona body.\n")
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/core-be/workspace.yaml"), `name: Core BE
-files_dir: dev-lead/core-be
-`)
-	mustWriteFile(t, filepath.Join(workPath, "dev-lead/core-be/system-prompt.md"), "Core BE persona body.\n")
-
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed dev tree")
-	mustGit(t, workPath, "push", "origin", "main")
-
-	// Step 2: configure git's insteadOf rewrite. The fetcher will try
-	// to clone https://git.moleculesai.app/molecule-ai/test-dev-dept.git;
-	// git rewrites to file://<barePath>.
-	//
-	// GIT_CONFIG_COUNT/KEY/VALUE injects config without touching
-	// ~/.gitconfig — process-scoped, no test pollution.
-	geesUrl := "https://git.moleculesai.app/molecule-ai/test-dev-dept.git"
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", geesUrl)
-
-	// Step 3: run resolveYAMLIncludes with !external pointing at the
-	// gitea URL. Allowlist is the default (molecule-ai/* on Gitea host).
-	rootDir := t.TempDir()
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/test-dev-dept
-    ref: main
-    path: dev-lead/workspace.yaml
-`)
-
-	out, err := resolveYAMLIncludes(src, rootDir)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 {
-		t.Fatalf("workspaces: %+v", tmpl.Workspaces)
-	}
-	dev := tmpl.Workspaces[0]
-	if dev.Name != "Dev Lead" {
-		t.Errorf("dev.Name = %q; want Dev Lead", dev.Name)
-	}
-	if !strings.Contains(dev.FilesDir, ".external-cache") {
-		t.Errorf("dev.FilesDir = %q; want cache prefix", dev.FilesDir)
-	}
-	if !strings.HasSuffix(dev.FilesDir, "dev-lead") {
-		t.Errorf("dev.FilesDir = %q; want suffix dev-lead", dev.FilesDir)
-	}
-	if len(dev.Children) != 1 {
-		t.Fatalf("expected nested core-be child; got %+v", dev.Children)
-	}
-	core := dev.Children[0]
-	if core.Name != "Core BE" {
-		t.Errorf("core.Name = %q; want Core BE", core.Name)
-	}
-	if !strings.HasSuffix(core.FilesDir, filepath.Join("dev-lead", "core-be")) {
-		t.Errorf("core.FilesDir = %q; want suffix dev-lead/core-be", core.FilesDir)
-	}
-
-	// Step 4: verify the cache dir actually exists and contains the
-	// materialized files (CopyTemplateToContainer would tar these).
-	cacheRoot := filepath.Join(rootDir, ".external-cache")
-	entries, err := os.ReadDir(cacheRoot)
-	if err != nil {
-		t.Fatalf("read cache root: %v", err)
-	}
-	if len(entries) != 1 {
-		t.Fatalf("expected 1 cached repo, got %d: %v", len(entries), entries)
-	}
-	repoDir := filepath.Join(cacheRoot, entries[0].Name())
-	shaDirs, _ := os.ReadDir(repoDir)
-	if len(shaDirs) != 1 {
-		t.Fatalf("expected 1 SHA cache dir, got %d", len(shaDirs))
-	}
-	cacheDir := filepath.Join(repoDir, shaDirs[0].Name())
-	if _, err := os.Stat(filepath.Join(cacheDir, "dev-lead/system-prompt.md")); err != nil {
-		t.Errorf("expected dev-lead/system-prompt.md in cache: %v", err)
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir, "dev-lead/core-be/system-prompt.md")); err != nil {
-		t.Errorf("expected dev-lead/core-be/system-prompt.md in cache: %v", err)
-	}
-
-	// Step 5: re-run; verify cache hit (no second clone). Set a
-	// "marker" file in the cache that a second clone would clobber.
-	marker := filepath.Join(cacheDir, ".cache-hit-marker")
-	if err := os.WriteFile(marker, []byte("hit"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	out2, err := resolveYAMLIncludes(src, rootDir)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes second call: %v", err)
-	}
-	if string(out) != string(out2) {
-		t.Errorf("cached output differs from initial — non-deterministic resolve")
-	}
-	if _, err := os.Stat(marker); err != nil {
-		t.Errorf("cache hit not honored — marker file disappeared: %v", err)
-	}
-}
-
-// TestGitFetcher_RealClone_BadRefFails: pointing at a ref that doesn't
-// exist in the bare-repo surfaces git's error cleanly.
-func TestGitFetcher_RealClone_BadRefFails(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping on windows")
-	}
-
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "empty-repo.git")
-	workPath := filepath.Join(fixtures, "work")
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "test@example.com")
-	mustGit(t, workPath, "config", "user.name", "Test")
-	mustWriteFile(t, filepath.Join(workPath, "README.md"), "x")
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed")
-	mustGit(t, workPath, "push", "origin", "main")
-
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", "https://git.moleculesai.app/molecule-ai/empty-repo.git")
-
-	rootDir := t.TempDir()
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/empty-repo
-    ref: nonexistent-branch
-    path: anything.yaml
-`)
-	_, err := resolveYAMLIncludes(src, rootDir)
-	if err == nil {
-		t.Fatalf("expected error for nonexistent ref; got nil")
-	}
-	if !strings.Contains(err.Error(), "ref") && !strings.Contains(err.Error(), "ls-remote") && !strings.Contains(err.Error(), "not found") {
-		t.Errorf("error doesn't mention ref/ls-remote: %v", err)
-	}
-}
-
-// ---------- helpers ----------
-
-func mustGit(t *testing.T, cwd string, args ...string) {
-	t.Helper()
-	cmd := exec.Command("git", args...)
-	if cwd != "" {
-		cmd.Dir = cwd
-	}
-	// Ensure user.email/name are set globally for non-cwd commands too.
-	cmd.Env = append(os.Environ(),
-		"GIT_AUTHOR_EMAIL=test@example.com",
-		"GIT_AUTHOR_NAME=Integration Test",
-		"GIT_COMMITTER_EMAIL=test@example.com",
-		"GIT_COMMITTER_NAME=Integration Test",
-	)
-	if out, err := cmd.CombinedOutput(); err != nil {
-		t.Fatalf("git %s: %v\n%s", strings.Join(args, " "), err, string(out))
-	}
-}
-
-func mustWriteFile(t *testing.T, path, content string) {
-	t.Helper()
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
-		t.Fatal(err)
-	}
-}
-
-// Verify gitFetcher.Fetch direct invocation (no resolver wrapping) for
-// the cache-hit path, exercising the bare API against a local bare-repo.
-func TestGitFetcher_DirectFetch_CacheHit(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git binary not found: %v", err)
-	}
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping on windows")
-	}
-
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "direct.git")
-	workPath := filepath.Join(fixtures, "w")
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "t@e")
-	mustGit(t, workPath, "config", "user.name", "T")
-	mustWriteFile(t, filepath.Join(workPath, "marker.txt"), "hello")
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed")
-	mustGit(t, workPath, "push", "origin", "main")
-
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", "https://git.moleculesai.app/molecule-ai/direct.git")
-
-	rootDir := t.TempDir()
-	g := &gitFetcher{}
-	ctx := context.Background()
-
-	cacheDir1, sha1, err := g.Fetch(ctx, rootDir, "git.moleculesai.app", "molecule-ai/direct", "main")
-	if err != nil {
-		t.Fatalf("first Fetch: %v", err)
-	}
-	if sha1 == "" || len(sha1) < 7 {
-		t.Errorf("expected SHA-like string, got %q", sha1)
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir1, "marker.txt")); err != nil {
-		t.Errorf("first fetch missing marker.txt: %v", err)
-	}
-
-	// Second call: cache hit, returns same dir + sha, no re-clone.
-	stamp := filepath.Join(cacheDir1, ".not-clobbered-by-second-fetch")
-	if err := os.WriteFile(stamp, []byte("x"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	cacheDir2, sha2, err := g.Fetch(ctx, rootDir, "git.moleculesai.app", "molecule-ai/direct", "main")
-	if err != nil {
-		t.Fatalf("second Fetch: %v", err)
-	}
-	if cacheDir2 != cacheDir1 || sha2 != sha1 {
-		t.Errorf("cache miss on second call: %q/%q vs %q/%q", cacheDir1, sha1, cacheDir2, sha2)
-	}
-	if _, err := os.Stat(stamp); err != nil {
-		t.Errorf("cache hit not honored — stamp file disappeared: %v", err)
-	}
-}
-
-// TestGitFetcher_RejectsRefWithDoubleDot: defense-in-depth on ref input.
-// safeRefPattern allows '.' as a regex character, so ".." would match
-// without an explicit deny. Verify it's rejected even though git itself
-// would also reject the resulting clone.
-func TestGitFetcher_RejectsRefWithDoubleDot(t *testing.T) {
-	rootDir := t.TempDir()
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/x
-    ref: foo..bar
-    path: x.yaml
-`)
-	_, err := resolveYAMLIncludes(src, rootDir)
-	if err == nil {
-		t.Fatalf("expected '..' rejection")
-	}
-	if !strings.Contains(err.Error(), "..") {
-		t.Errorf("expected '..' in error; got %v", err)
-	}
-}
-
-// TestGitFetcher_CacheValidatedByCompleteMarker: a partially-written
-// cache (the .git dir exists but no .complete marker) is treated as
-// cache-miss and re-fetched. Catches the broken-cache-permanence bug.
-func TestGitFetcher_CacheValidatedByCompleteMarker(t *testing.T) {
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skipf("git not found: %v", err)
-	}
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping on windows")
-	}
-
-	fixtures := t.TempDir()
-	barePath := filepath.Join(fixtures, "test.git")
-	workPath := filepath.Join(fixtures, "w")
-	mustGit(t, "", "init", "--bare", "-b", "main", barePath)
-	mustGit(t, "", "clone", barePath, workPath)
-	mustGit(t, workPath, "config", "user.email", "t@e")
-	mustGit(t, workPath, "config", "user.name", "T")
-	mustWriteFile(t, filepath.Join(workPath, "good.txt"), "from-network")
-	mustGit(t, workPath, "add", ".")
-	mustGit(t, workPath, "commit", "-m", "seed")
-	mustGit(t, workPath, "push", "origin", "main")
-	t.Setenv("GIT_CONFIG_COUNT", "1")
-	t.Setenv("GIT_CONFIG_KEY_0", "url."+barePath+".insteadOf")
-	t.Setenv("GIT_CONFIG_VALUE_0", "https://git.moleculesai.app/molecule-ai/marker-test.git")
-
-	rootDir := t.TempDir()
-	g := &gitFetcher{}
-
-	// First fetch — populates the cache (creates .complete marker).
-	cacheDir1, _, err := g.Fetch(context.Background(), rootDir, "git.moleculesai.app", "molecule-ai/marker-test", "main")
-	if err != nil {
-		t.Fatalf("first Fetch: %v", err)
-	}
-	marker := filepath.Join(cacheDir1, cacheCompleteMarker)
-	if _, err := os.Stat(marker); err != nil {
-		t.Fatalf("first fetch should have written .complete marker: %v", err)
-	}
-
-	// Now simulate a partial cache: delete the marker but leave .git
-	// in place. The next Fetch should treat this as cache-miss and
-	// re-fetch (NOT silently use the partial cache).
-	if err := os.Remove(marker); err != nil {
-		t.Fatal(err)
-	}
-	// Drop a sentinel file the second fetch will clobber if it re-fetches.
-	sentinel := filepath.Join(cacheDir1, "_should_be_clobbered")
-	if err := os.WriteFile(sentinel, []byte("partial"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	cacheDir2, _, err := g.Fetch(context.Background(), rootDir, "git.moleculesai.app", "molecule-ai/marker-test", "main")
-	if err != nil {
-		t.Fatalf("second Fetch: %v", err)
-	}
-	if cacheDir1 != cacheDir2 {
-		t.Errorf("cache dirs differ across fetches: %q vs %q", cacheDir1, cacheDir2)
-	}
-	if _, err := os.Stat(filepath.Join(cacheDir2, cacheCompleteMarker)); err != nil {
-		t.Errorf("re-fetch should have re-written .complete marker: %v", err)
-	}
-	if _, err := os.Stat(sentinel); err == nil {
-		t.Errorf("sentinel still present — re-fetch did NOT clobber partial cache")
-	}
-}
--- a/workspace-server/internal/handlers/org_external_test.go
+++ b/workspace-server/internal/handlers/org_external_test.go
@ -1,331 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-// fakeFetcher pre-stages a "fetched" repo at a fixed path inside the
-// rootDir's .external-cache, bypassing the real git clone. Tests
-// inject this via SetExternalFetcherForTest to exercise the resolver
-// + path-rewrite logic without network.
-type fakeFetcher struct {
-	// content maps "<host>/<repo>@<ref>" → a function that materializes
-	// repo content under cacheDir. Returns the fake SHA to use.
-	content map[string]func(cacheDir string) (sha string, err error)
-}
-
-func (f *fakeFetcher) Fetch(ctx context.Context, rootDir, host, repoPath, ref string) (string, string, error) {
-	key := host + "/" + repoPath + "@" + ref
-	stage, ok := f.content[key]
-	if !ok {
-		return "", "", &fakeNotFoundError{key: key}
-	}
-	// Use a stable SHA for the test so cache dir is deterministic.
-	cacheDir := filepath.Join(rootDir, ".external-cache", safeRepoCacheDir(host, repoPath), "deadbeef")
-	if err := os.MkdirAll(cacheDir, 0o755); err != nil {
-		return "", "", err
-	}
-	sha, err := stage(cacheDir)
-	if err != nil {
-		return "", "", err
-	}
-	return cacheDir, sha, nil
-}
-
-type fakeNotFoundError struct{ key string }
-
-func (e *fakeNotFoundError) Error() string {
-	return "fake fetcher: no content registered for " + e.key
-}
-
-// stageFiles writes a map of relative-path → content into cacheDir,
-// returning a fake SHA. Helper for fakeFetcher closures.
-func stageFiles(cacheDir string, files map[string]string) error {
-	if err := os.MkdirAll(filepath.Join(cacheDir, ".git"), 0o755); err != nil {
-		return err
-	}
-	for path, content := range files {
-		full := filepath.Join(cacheDir, path)
-		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
-			return err
-		}
-		if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// TestResolveExternalMapping_HappyPath: a parent template with an
-// !external entry resolves cleanly into the fetched workspace + path-
-// rewrites files_dir + relative !include refs into the cache prefix.
-func TestResolveExternalMapping_HappyPath(t *testing.T) {
-	tmp := t.TempDir()
-
-	// Stub fetcher: "fetched" content has a workspace.yaml that uses
-	// files_dir + nested !include relative to the fetched repo's root.
-	fake := &fakeFetcher{
-		content: map[string]func(string) (string, error){
-			"git.moleculesai.app/molecule-ai/molecule-dev-department@main": func(cacheDir string) (string, error) {
-				return "deadbeef", stageFiles(cacheDir, map[string]string{
-					"dev-lead/workspace.yaml": `name: Dev Lead
-files_dir: dev-lead
-children:
-  - !include ./core-lead/workspace.yaml
-`,
-					"dev-lead/core-lead/workspace.yaml": `name: Core Platform Lead
-files_dir: dev-lead/core-lead
-`,
-				})
-			},
-		},
-	}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	src := []byte(`name: Parent
-workspaces:
-  - !external
-    repo: molecule-ai/molecule-dev-department
-    ref: main
-    path: dev-lead/workspace.yaml
-`)
-
-	out, err := resolveYAMLIncludes(src, tmp)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 {
-		t.Fatalf("workspaces: %+v", tmpl.Workspaces)
-	}
-	dev := tmpl.Workspaces[0]
-	if dev.Name != "Dev Lead" {
-		t.Errorf("dev.Name = %q; want Dev Lead", dev.Name)
-	}
-	// files_dir should be cache-prefixed.
-	wantPrefix := filepath.Join(".external-cache", "git.moleculesai.app__molecule-ai__molecule-dev-department", "deadbeef")
-	if !strings.HasPrefix(dev.FilesDir, wantPrefix) {
-		t.Errorf("dev.FilesDir = %q; want prefix %q", dev.FilesDir, wantPrefix)
-	}
-	if !strings.HasSuffix(dev.FilesDir, "dev-lead") {
-		t.Errorf("dev.FilesDir = %q; want suffix dev-lead", dev.FilesDir)
-	}
-	// Nested child: files_dir cache-prefixed, name Core Platform Lead.
-	if len(dev.Children) != 1 {
-		t.Fatalf("dev.Children: %+v", dev.Children)
-	}
-	core := dev.Children[0]
-	if core.Name != "Core Platform Lead" {
-		t.Errorf("core.Name = %q; want Core Platform Lead", core.Name)
-	}
-	if !strings.HasPrefix(core.FilesDir, wantPrefix) {
-		t.Errorf("core.FilesDir = %q; want prefix %q", core.FilesDir, wantPrefix)
-	}
-	if !strings.HasSuffix(core.FilesDir, filepath.Join("dev-lead", "core-lead")) {
-		t.Errorf("core.FilesDir = %q; want suffix dev-lead/core-lead", core.FilesDir)
-	}
-}
-
-// TestResolveExternalMapping_AllowlistRejection: hostile yaml pointing
-// at a non-allowlisted repo gets rejected.
-func TestResolveExternalMapping_AllowlistRejection(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	// Default allowlist is git.moleculesai.app/molecule-ai/*.
-	// github.com/foo/bar is NOT in it.
-	src := []byte(`workspaces:
-  - !external
-    repo: foo/bar
-    ref: main
-    path: x.yaml
-    url: github.com
-`)
-	_, err := resolveYAMLIncludes(src, tmp)
-	if err == nil {
-		t.Fatalf("expected allowlist rejection, got nil")
-	}
-	if !strings.Contains(err.Error(), "MOLECULE_EXTERNAL_REPO_ALLOWLIST") {
-		t.Errorf("expected allowlist error; got %v", err)
-	}
-}
-
-// TestResolveExternalMapping_PathTraversalRejection: hostile yaml
-// with `path: ../../etc/passwd` gets rejected before fetch.
-func TestResolveExternalMapping_PathTraversalRejection(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/dev-department
-    ref: main
-    path: ../../etc/passwd
-`)
-	_, err := resolveYAMLIncludes(src, tmp)
-	if err == nil {
-		t.Fatalf("expected path traversal rejection, got nil")
-	}
-	if !strings.Contains(err.Error(), "relative-and-down-only") {
-		t.Errorf("expected path traversal error; got %v", err)
-	}
-}
-
-// TestResolveExternalMapping_BadRefRejection: non-allowlisted ref chars.
-func TestResolveExternalMapping_BadRefRejection(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	src := []byte(`workspaces:
-  - !external
-    repo: molecule-ai/dev-department
-    ref: "main; rm -rf /"
-    path: foo.yaml
-`)
-	_, err := resolveYAMLIncludes(src, tmp)
-	if err == nil || !strings.Contains(err.Error(), "disallowed characters") {
-		t.Errorf("expected ref-validation error; got %v", err)
-	}
-}
-
-// TestResolveExternalMapping_MissingRequiredFields: repo / ref / path
-// are all required.
-func TestResolveExternalMapping_MissingRequiredFields(t *testing.T) {
-	tmp := t.TempDir()
-	fake := &fakeFetcher{content: map[string]func(string) (string, error){}}
-	cleanup := SetExternalFetcherForTest(fake)
-	defer cleanup()
-
-	cases := []string{
-		// missing repo
-		`workspaces:
-  - !external
-    ref: main
-    path: x.yaml
-`,
-		// missing ref
-		`workspaces:
-  - !external
-    repo: molecule-ai/x
-    path: x.yaml
-`,
-		// missing path
-		`workspaces:
-  - !external
-    repo: molecule-ai/x
-    ref: main
-`,
-	}
-	for i, src := range cases {
-		_, err := resolveYAMLIncludes([]byte(src), tmp)
-		if err == nil {
-			t.Errorf("case %d: expected required-field error, got nil", i)
-		} else if !strings.Contains(err.Error(), "required") {
-			t.Errorf("case %d: want 'required' in error; got %v", i, err)
-		}
-	}
-}
-
-// TestRewriteFilesDir: verify the path-rewrite walker
-// prefixes files_dir scalars. !include scalars are NOT rewritten —
-// they resolve relative to their containing file's dir, which post-
-// fetch is naturally inside the cache.
-func TestRewriteFilesDir(t *testing.T) {
-	src := `name: Foo
-files_dir: dev-lead
-children:
-  - !include ./bar/workspace.yaml
-  - !include other-team.yaml
-inner:
-  files_dir: dev-lead/sub
-`
-	var n yaml.Node
-	if err := yaml.Unmarshal([]byte(src), &n); err != nil {
-		t.Fatal(err)
-	}
-	rewriteFilesDir(&n, ".external-cache/foo/bar")
-
-	out, err := yaml.Marshal(&n)
-	if err != nil {
-		t.Fatal(err)
-	}
-	got := string(out)
-	for _, want := range []string{
-		"files_dir: .external-cache/foo/bar/dev-lead",
-		"files_dir: .external-cache/foo/bar/dev-lead/sub",
-		// !include preserved as-is; resolves naturally via subDir.
-		"!include ./bar/workspace.yaml",
-		"!include other-team.yaml",
-	} {
-		if !strings.Contains(got, want) {
-			t.Errorf("missing %q in:\n%s", want, got)
-		}
-	}
-}
-
-// TestRewriteFilesDir_Idempotent: re-running the rewriter
-// on already-prefixed files_dir doesn't double-prefix.
-func TestRewriteFilesDir_Idempotent(t *testing.T) {
-	src := `files_dir: .external-cache/foo/bar/dev-lead
-inner:
-  files_dir: .external-cache/foo/bar/dev-lead/sub
-`
-	var n yaml.Node
-	if err := yaml.Unmarshal([]byte(src), &n); err != nil {
-		t.Fatal(err)
-	}
-	rewriteFilesDir(&n, ".external-cache/foo/bar")
-
-	out, _ := yaml.Marshal(&n)
-	got := string(out)
-	if strings.Contains(got, ".external-cache/foo/bar/.external-cache") {
-		t.Errorf("double-prefix detected:\n%s", got)
-	}
-	// Should still be valid (single-prefixed) afterwards.
-	for _, want := range []string{
-		"files_dir: .external-cache/foo/bar/dev-lead",
-		"files_dir: .external-cache/foo/bar/dev-lead/sub",
-	} {
-		if !strings.Contains(got, want) {
-			t.Errorf("expected unchanged %q in:\n%s", want, got)
-		}
-	}
-}
-
-// TestAllowlistedHostPath: env-var override + glob matching.
-func TestAllowlistedHostPath(t *testing.T) {
-	t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "")
-	if !allowlistedHostPath("git.moleculesai.app", "molecule-ai/foo") {
-		t.Error("default allowlist should accept molecule-ai/*")
-	}
-	if allowlistedHostPath("github.com", "molecule-ai/foo") {
-		t.Error("default allowlist should reject github.com")
-	}
-	t.Setenv("MOLECULE_EXTERNAL_REPO_ALLOWLIST", "github.com/me/*,git.moleculesai.app/*")
-	if !allowlistedHostPath("github.com", "me/x") {
-		t.Error("override should accept github.com/me/*")
-	}
-	if !allowlistedHostPath("git.moleculesai.app", "any/repo") {
-		t.Error("override should accept git.moleculesai.app/*")
-	}
-	if allowlistedHostPath("github.com", "evil/x") {
-		t.Error("override should reject github.com/evil/*")
-	}
-}
--- a/workspace-server/internal/handlers/org_helpers.go
+++ b/workspace-server/internal/handlers/org_helpers.go
@ -6,7 +6,6 @@ package handlers

 import (
 	"fmt"
-	"log"
 	"os"
 	"path/filepath"
 	"regexp"
@ -103,56 +102,6 @@ func loadWorkspaceEnv(orgBaseDir, filesDir string) map[string]string {
 	return envVars
 }

-// loadPersonaEnvFile merges per-role persona credentials into out. The file
-// lives at $MOLECULE_PERSONA_ROOT/<role>/env (default
-// /etc/molecule-bootstrap/personas) and is populated by the operator-host
-// bootstrap kit — one persona per dev-tree role, each carrying the role's
-// Gitea identity (GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES,
-// GITEA_USER_EMAIL, GITEA_SSH_KEY_PATH).
-//
-// Lower precedence than the org and workspace .env files: callers should
-// invoke this BEFORE parseEnvFile on those, so a workspace .env can
-// override a persona-default value when needed.
-//
-// Silent no-op when role is empty, when the role name fails the safe-segment
-// check, or when the env file does not exist (workspaces without a role —
-// or running on hosts that don't ship the bootstrap dir — keep their old
-// behavior).
-func loadPersonaEnvFile(role string, out map[string]string) {
-	if !isSafeRoleName(role) {
-		if role != "" {
-			log.Printf("Org import: refusing persona env load for unsafe role name %q", role)
-		}
-		return
-	}
-	root := os.Getenv("MOLECULE_PERSONA_ROOT")
-	if root == "" {
-		root = "/etc/molecule-bootstrap/personas"
-	}
-	parseEnvFile(filepath.Join(root, role, "env"), out)
-}
-
-// isSafeRoleName accepts a single path segment of [A-Za-z0-9_-]+. Rejects
-// empty, ".", "..", and anything containing a path separator — even though
-// the construct is admin-only, defense-in-depth keeps the persona dir
-// shape invariant: one flat directory per role, no climbing out.
-func isSafeRoleName(s string) bool {
-	if s == "" || s == "." || s == ".." {
-		return false
-	}
-	for _, c := range s {
-		switch {
-		case c >= 'a' && c <= 'z':
-		case c >= 'A' && c <= 'Z':
-		case c >= '0' && c <= '9':
-		case c == '-' || c == '_':
-		default:
-			return false
-		}
-	}
-	return true
-}
-
 // parseEnvFile reads a .env file and adds KEY=VALUE pairs to the map.
 // Skips comments (#) and empty lines. Values can be quoted.
 func parseEnvFile(path string, out map[string]string) {
--- a/workspace-server/internal/handlers/org_import.go
+++ b/workspace-server/internal/handlers/org_import.go
@ -42,20 +42,6 @@ import (
 // straight into the parent's child-coordinate space without doing a
 // canvas-wide absolute-position walk.
 func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX, absY, relX, relY float64, defaults OrgDefaults, orgBaseDir string, results *[]map[string]interface{}, provisionSem chan struct{}) error {
-	// spawning: false guard — skip this workspace AND all descendants.
-	// Pointer-typed so we distinguish "explicitly false" from "unset"
-	// (unset = default to spawn). The guard sits BEFORE any side effect
-	// (no DB row, no docker provision, no children recursion) so a
-	// false-spawning subtree is genuinely a no-op except for the log line.
-	// Use case: dev-tree org template ships the full role taxonomy but a
-	// developer's machine only has RAM for a subset; a per-workspace
-	// `spawning: false` lets them narrow without editing the parent
-	// template's structure.
-	if ws.Spawning != nil && !*ws.Spawning {
-		log.Printf("Org import: skipping workspace %q (spawning=false; %d descendant workspace(s) in subtree also skipped)", ws.Name, countWorkspaces(ws.Children))
-		return nil
-	}
-
 	// Apply defaults
 	runtime := ws.Runtime
 	if runtime == "" {
@ -457,35 +443,10 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 			configFiles["system-prompt.md"] = []byte(ws.SystemPrompt)
 		}

-		// Inject secrets from persona env + .env files as workspace secrets.
-		// Resolution (later overrides earlier):
-		//   0. Persona env (per-role bootstrap creds; only when ws.Role is set
-		//      and the operator-host bootstrap dir ships a matching file)
-		//   1. Org root .env (shared defaults)
-		//   2. Workspace-specific .env (per-workspace overrides)
+		// Inject secrets from .env files as workspace secrets.
+		// Resolution: workspace .env → org root .env (workspace overrides org root).
 		// Each line: KEY=VALUE → stored as encrypted workspace secret.
 		envVars := map[string]string{}
-		// 0. Persona env (lowest precedence; injects the role's Gitea identity:
-		//    GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL,
-		//    GITEA_SSH_KEY_PATH, plus MODEL_PROVIDER/MODEL and the LLM auth
-		//    token like CLAUDE_CODE_OAUTH_TOKEN or MINIMAX_API_KEY).
-		//    Workspace and org .env can override.
-		//
-		// Use ws.FilesDir as the persona-dir lookup key, NOT ws.Role. In the
-		// dev-tree org.yaml shape, `role:` carries the multi-line descriptive
-		// text the agent reads from its prompt ("Engineering planning and
-		// team coordination — leads Core Platform, Controlplane, ..."), while
-		// `files_dir:` holds the short slug (`core-lead`, `dev-lead`, etc.)
-		// matching `~/.molecule-ai/personas/<files_dir>/env`
-		// (bind-mounted to `/etc/molecule-bootstrap/personas/<files_dir>/env`).
-		//
-		// Pre-fix, this passed `ws.Role` whose multi-word content failed
-		// isSafeRoleName silently, so every imported workspace booted with
-		// zero persona-env rows in workspace_secrets — no ANTHROPIC /
-		// CLAUDE_CODE auth in the container env. The claude_agent_sdk
-		// then wedged on `query.initialize()` with a 60s control-request
-		// timeout (caught 2026-05-08 right after dev-only org/import).
-		loadPersonaEnvFile(ws.FilesDir, envVars)
 		if orgBaseDir != "" {
 			// 1. Org root .env (shared defaults)
 			parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars)
--- a/workspace-server/internal/handlers/org_import_reconcile_test.go
+++ b/workspace-server/internal/handlers/org_import_reconcile_test.go
@ -1,158 +0,0 @@
-package handlers
-
-import (
-	"context"
-	"sort"
-	"testing"
-
-	"github.com/DATA-DOG/go-sqlmock"
-)
-
-// Tests for the reconcile-mode + audit-event additions to OrgHandler.Import.
-//
-// Background: /org/import was purely additive — re-running with a tree that
-// renamed/reparented a role left the prior workspace online (different
-// parent_id from the new one, so lookupExistingChild's parent-scoped dedupe
-// missed it). The 2026-05-08 dev-tree case left 8 orphans surviving a
-// re-import. mode="reconcile" closes the gap; emitOrgEvent makes "what
-// happened at 20:13?" queryable instead of stdout-grep archaeology.
-
-func TestWalkOrgWorkspaceNames_FlatTree(t *testing.T) {
-	tree := []OrgWorkspace{
-		{Name: "Dev Lead"},
-		{Name: "Release Manager"},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Dev Lead", "Release Manager"}
-	if !equalStrings(names, want) {
-		t.Errorf("flat tree: got %v, want %v", names, want)
-	}
-}
-
-func TestWalkOrgWorkspaceNames_NestedTree(t *testing.T) {
-	tree := []OrgWorkspace{
-		{
-			Name: "Dev Lead",
-			Children: []OrgWorkspace{
-				{Name: "Core Platform Lead", Children: []OrgWorkspace{{Name: "Core-BE"}}},
-				{Name: "SDK Lead"},
-			},
-		},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Core Platform Lead", "Core-BE", "Dev Lead", "SDK Lead"}
-	if !equalStrings(names, want) {
-		t.Errorf("nested tree: got %v, want %v", names, want)
-	}
-}
-
-// Pins the contract that spawning:false subtrees still contribute their names
-// to the reconcile working set. If the walker started skipping them, a
-// re-import that toggled spawning would orphan whichever workspaces had been
-// previously imported with spawning:true — the inverse of the bug being
-// fixed. Spawning gates *provisioning*, not *reconcile membership*.
-func TestWalkOrgWorkspaceNames_SpawningFalseStillCounted(t *testing.T) {
-	f := false
-	tree := []OrgWorkspace{
-		{Name: "Dev Lead", Children: []OrgWorkspace{
-			{Name: "Skipped Lead", Spawning: &f, Children: []OrgWorkspace{
-				{Name: "Skipped Child"},
-			}},
-		}},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Dev Lead", "Skipped Child", "Skipped Lead"}
-	if !equalStrings(names, want) {
-		t.Errorf("spawning:false subtree: got %v, want %v", names, want)
-	}
-}
-
-func TestWalkOrgWorkspaceNames_EmptyNamesSkipped(t *testing.T) {
-	tree := []OrgWorkspace{
-		{Name: "Dev Lead"},
-		{Name: ""}, // YAML default / placeholder
-		{Name: "Release Manager"},
-	}
-	var names []string
-	walkOrgWorkspaceNames(tree, &names)
-	sort.Strings(names)
-	want := []string{"Dev Lead", "Release Manager"}
-	if !equalStrings(names, want) {
-		t.Errorf("empty-name skip: got %v, want %v", names, want)
-	}
-}
-
-// emitOrgEvent must INSERT into structure_events with event_type + JSON
-// payload. Verifies the SQL shape pinning so a future schema rename
-// (e.g., switching to audit_events) breaks the test loudly instead of
-// silently dropping telemetry.
-func TestEmitOrgEvent_InsertsToStructureEvents(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectExec(`INSERT INTO structure_events`).
-		WithArgs("org.import.started", sqlmock.AnyArg()).
-		WillReturnResult(sqlmock.NewResult(1, 1))
-
-	emitOrgEvent(context.Background(), "org.import.started", map[string]any{
-		"name": "test-org",
-		"mode": "reconcile",
-	})
-
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("sqlmock expectations: %v", err)
-	}
-}
-
-// Insert failures are log-and-swallow — telemetry MUST NOT block the
-// caller path. If this regresses (e.g., a future patch returns the err),
-// org-import requests would fail with HTTP 500 every time a structure_events
-// INSERT hiccups, which is strictly worse than losing the row.
-func TestEmitOrgEvent_DBErrorIsSwallowed(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectExec(`INSERT INTO structure_events`).
-		WithArgs("org.import.failed", sqlmock.AnyArg()).
-		WillReturnError(errSentinelTest)
-
-	// Must not panic; must not propagate. The function returns nothing,
-	// so the contract is "doesn't crash."
-	emitOrgEvent(context.Background(), "org.import.failed", map[string]any{
-		"err": "preflight failed",
-	})
-
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("sqlmock expectations: %v", err)
-	}
-}
-
-func TestErrString(t *testing.T) {
-	if got := errString(nil); got != "" {
-		t.Errorf("nil error: got %q, want empty", got)
-	}
-	if got := errString(errSentinelTest); got != "sentinel" {
-		t.Errorf("sentinel error: got %q, want \"sentinel\"", got)
-	}
-}
-
-// errSentinelTest is a marker error used for swallow-error assertions.
-var errSentinelTest = sentinelErrTest{}
-
-type sentinelErrTest struct{}
-
-func (sentinelErrTest) Error() string { return "sentinel" }
-
-func equalStrings(a, b []string) bool {
-	if len(a) != len(b) {
-		return false
-	}
-	for i := range a {
-		if a[i] != b[i] {
-			return false
-		}
-	}
-	return true
-}
--- a/workspace-server/internal/handlers/org_include.go
+++ b/workspace-server/internal/handlers/org_include.go
@ -76,12 +76,6 @@ func expandNode(n *yaml.Node, currentDir, rootDir string, visited map[string]boo
 		return resolveIncludeScalar(n, currentDir, rootDir, visited, depth)
 	}

-	// `!external`-tagged mapping: gitops cross-repo subtree composition.
-	// See org_external.go (internal#77 / task #222).
-	if n.Kind == yaml.MappingNode && n.Tag == "!external" {
-		return resolveExternalMapping(n, currentDir, rootDir, visited, depth)
-	}
-
 	for _, child := range n.Content {
 		if err := expandNode(child, currentDir, rootDir, visited, depth); err != nil {
 			return err
--- a/workspace-server/internal/handlers/org_include_symlink_test.go
+++ b/workspace-server/internal/handlers/org_include_symlink_test.go
@ -1,136 +0,0 @@
-package handlers
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-// Phase 5 (RFC internal#77 dev-department extraction):
-// Proves a parent org template can compose a subtree from a sibling repo
-// via a directory symlink. Pattern that gets shipped:
-//
-//   /org-templates/parent-template/                  ← imported by POST /org/import
-//     org.yaml                                       (workspaces: !include dev/dev-lead/workspace.yaml)
-//     dev → /org-templates/molecule-dev-department/  (symlink)
-//   /org-templates/molecule-dev-department/          (sibling repo)
-//     dev-lead/
-//       workspace.yaml                               (children: !include ./core-platform/workspace.yaml)
-//       core-platform/
-//         workspace.yaml
-//
-// resolveYAMLIncludes resolves paths via filepath.Abs/Rel (no symlink
-// following at the path-string layer), so the security check passes. The
-// actual file open uses os.ReadFile, which DOES follow symlinks — so the
-// content from the sibling repo gets inlined. This test pins that contract.
-func TestResolveYAMLIncludes_FollowsDirectorySymlink(t *testing.T) {
-	tmp := t.TempDir()
-
-	// Subtree repo: dev-department/dev-lead/...
-	devDept := filepath.Join(tmp, "molecule-dev-department")
-	devLead := filepath.Join(devDept, "dev-lead")
-	corePlatform := filepath.Join(devLead, "core-platform")
-	if err := os.MkdirAll(corePlatform, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	// dev-lead/workspace.yaml — uses `./core-platform/workspace.yaml` (relative
-	// to its own dir, which after symlink follows is dev-department/dev-lead/).
-	devLeadYAML := []byte(`name: Dev Lead
-tier: 3
-children:
-  - !include ./core-platform/workspace.yaml
-`)
-	if err := os.WriteFile(filepath.Join(devLead, "workspace.yaml"), devLeadYAML, 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(corePlatform, "workspace.yaml"), []byte("name: Core Platform\ntier: 3\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	// Parent template: parent/, with `dev` symlink → ../molecule-dev-department/
-	parent := filepath.Join(tmp, "parent-template")
-	if err := os.MkdirAll(parent, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	// Symlink TARGET is a relative path (matches operator-side deploy
-	// convention where both repos are cloned as siblings under a shared
-	// /org-templates/ dir).
-	if err := os.Symlink("../molecule-dev-department", filepath.Join(parent, "dev")); err != nil {
-		t.Skipf("symlinks unsupported on this fs: %v", err)
-	}
-
-	// Parent's org.yaml: !include into the symlinked subtree.
-	src := []byte(`name: Parent
-workspaces:
-  - !include dev/dev-lead/workspace.yaml
-`)
-
-	out, err := resolveYAMLIncludes(src, parent)
-	if err != nil {
-		t.Fatalf("resolveYAMLIncludes through symlink failed: %v", err)
-	}
-
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 {
-		t.Fatalf("expected 1 workspace, got %d", len(tmpl.Workspaces))
-	}
-	if tmpl.Workspaces[0].Name != "Dev Lead" {
-		t.Fatalf("workspace[0].Name = %q; want Dev Lead", tmpl.Workspaces[0].Name)
-	}
-	kids := tmpl.Workspaces[0].Children
-	if len(kids) != 1 {
-		t.Fatalf("expected 1 child workspace, got %d", len(kids))
-	}
-	if kids[0].Name != "Core Platform" {
-		t.Fatalf("child[0].Name = %q; want Core Platform — symlink-aware nested !include broken", kids[0].Name)
-	}
-}
-
-// Companion: prove the security check still works when the symlink target
-// is OUTSIDE the parent template's root. This is the "hostile symlink"
-// case — an org.yaml that tries to slip in arbitrary files from /etc.
-func TestResolveYAMLIncludes_RejectsSymlinkEscapingRoot(t *testing.T) {
-	tmp := t.TempDir()
-	parent := filepath.Join(tmp, "parent-template")
-	outside := filepath.Join(tmp, "outside")
-	if err := os.MkdirAll(parent, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.MkdirAll(outside, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(outside, "evil.yaml"), []byte("name: Evil\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	// Symlink that escapes the parent root via `../outside/...`. The path
-	// STRING `evil` resolves to parent/evil — passes the rel2 check. But
-	// because filepath.Abs doesn't follow symlinks, the ReadFile call DOES
-	// follow it to outside/evil.yaml. This is the trade-off the symlink
-	// approach accepts: the security boundary is a deployment-layer
-	// invariant, not a code-layer one. Documented in dev-department/README.
-	if err := os.Symlink(filepath.Join(outside, "evil.yaml"), filepath.Join(parent, "evil.yaml")); err != nil {
-		t.Skipf("symlinks unsupported on this fs: %v", err)
-	}
-	src := []byte("workspaces:\n  - !include evil.yaml\n")
-	out, err := resolveYAMLIncludes(src, parent)
-	if err != nil {
-		// If the resolver is later hardened to refuse symlink targets
-		// outside the root (e.g. via filepath.EvalSymlinks), this test
-		// will start failing — and the dev-department symlink approach
-		// would need to be updated accordingly.
-		t.Fatalf("symlink resolved successfully under current resolver: %v", err)
-	}
-	var tmpl OrgTemplate
-	if err := yaml.Unmarshal(out, &tmpl); err != nil {
-		t.Fatalf("unmarshal: %v", err)
-	}
-	if len(tmpl.Workspaces) != 1 || tmpl.Workspaces[0].Name != "Evil" {
-		t.Fatalf("expected Evil workspace via symlink; got %+v", tmpl.Workspaces)
-	}
-}
--- a/workspace-server/internal/handlers/org_persona_env_test.go
+++ b/workspace-server/internal/handlers/org_persona_env_test.go
@ -1,171 +0,0 @@
-package handlers
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-// TestLoadPersonaEnvFile_HappyPath: the standard case — a persona-shaped
-// env file exists at <root>/<role>/env and its KEY=VALUE pairs land in
-// the out map. Mirrors what the operator-host bootstrap kit ships:
-// GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL,
-// GITEA_SSH_KEY_PATH.
-func TestLoadPersonaEnvFile_HappyPath(t *testing.T) {
-	root := t.TempDir()
-	roleDir := filepath.Join(root, "dev-lead")
-	if err := os.MkdirAll(roleDir, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	envBody := `# Persona env file — mode 600
-GITEA_USER=dev-lead
-GITEA_USER_EMAIL=dev-lead@agents.moleculesai.app
-GITEA_TOKEN=abc123
-GITEA_TOKEN_SCOPES=write:repository,write:issue,read:user
-GITEA_SSH_KEY_PATH=/etc/molecule-bootstrap/personas/dev-lead/ssh_priv
-`
-	if err := os.WriteFile(filepath.Join(roleDir, "env"), []byte(envBody), 0o600); err != nil {
-		t.Fatal(err)
-	}
-	t.Setenv("MOLECULE_PERSONA_ROOT", root)
-
-	out := map[string]string{}
-	loadPersonaEnvFile("dev-lead", out)
-
-	want := map[string]string{
-		"GITEA_USER":           "dev-lead",
-		"GITEA_USER_EMAIL":     "dev-lead@agents.moleculesai.app",
-		"GITEA_TOKEN":          "abc123",
-		"GITEA_TOKEN_SCOPES":   "write:repository,write:issue,read:user",
-		"GITEA_SSH_KEY_PATH":   "/etc/molecule-bootstrap/personas/dev-lead/ssh_priv",
-	}
-	if len(out) != len(want) {
-		t.Fatalf("got %d keys, want %d: %#v", len(out), len(want), out)
-	}
-	for k, v := range want {
-		if out[k] != v {
-			t.Errorf("out[%q] = %q; want %q", k, out[k], v)
-		}
-	}
-}
-
-// TestLoadPersonaEnvFile_MissingDir: when the persona dir doesn't exist
-// (e.g. dev-only host without the bootstrap kit, or a workspace whose
-// role isn't a known persona), it's a silent no-op — out stays empty,
-// no panic, no log noise that would break callers.
-func TestLoadPersonaEnvFile_MissingDir(t *testing.T) {
-	t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir()) // empty dir
-	out := map[string]string{}
-	loadPersonaEnvFile("nonexistent-role", out)
-	if len(out) != 0 {
-		t.Errorf("expected empty out, got %#v", out)
-	}
-}
-
-// TestLoadPersonaEnvFile_EmptyRole: empty role string is the common case
-// for non-dev workspaces (research/marketing/etc.). Skip silently.
-func TestLoadPersonaEnvFile_EmptyRole(t *testing.T) {
-	t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir())
-	out := map[string]string{}
-	loadPersonaEnvFile("", out)
-	if len(out) != 0 {
-		t.Errorf("empty role should produce empty out; got %#v", out)
-	}
-}
-
-// TestLoadPersonaEnvFile_RejectsTraversal: even though role names come
-// from server-side admin-only org templates, defense-in-depth — refuse
-// any role string with path separators or "..". Verifies that a maliciously
-// crafted template can't read /etc/passwd by setting role: "../../etc".
-func TestLoadPersonaEnvFile_RejectsTraversal(t *testing.T) {
-	root := t.TempDir()
-	// Plant a file at /tmp/.../env so a bad traversal would reach it
-	if err := os.WriteFile(filepath.Join(root, "env"), []byte("STOLEN=yes\n"), 0o600); err != nil {
-		t.Fatal(err)
-	}
-	t.Setenv("MOLECULE_PERSONA_ROOT", filepath.Join(root, "personas"))
-
-	for _, bad := range []string{"..", "../personas", "../etc/passwd", "/abs", "with/slash", "dot.in.middle", "with space", "back\\slash", ".", ""} {
-		out := map[string]string{}
-		loadPersonaEnvFile(bad, out)
-		if len(out) != 0 {
-			t.Errorf("role %q should have been rejected; got %#v", bad, out)
-		}
-	}
-}
-
-// TestLoadPersonaEnvFile_DefaultRoot: when MOLECULE_PERSONA_ROOT is unset,
-// the helper falls back to /etc/molecule-bootstrap/personas. We don't
-// touch real /etc — just verify the function doesn't panic and produces
-// empty out (since the test box isn't expected to ship that path).
-func TestLoadPersonaEnvFile_DefaultRoot(t *testing.T) {
-	t.Setenv("MOLECULE_PERSONA_ROOT", "") // explicit empty
-	out := map[string]string{}
-	loadPersonaEnvFile("dev-lead", out)
-	// Don't assert content — production CI might or might not have the
-	// /etc dir mounted. Just verify the call returns cleanly.
-	_ = out
-}
-
-// TestLoadPersonaEnvFile_PrecedenceCallerOverrides: the contract is "lower
-// precedence than later .env files." The helper writes into out without
-// removing existing keys, so a caller pre-populating out simulates a
-// later layer overriding persona defaults. We verify the helper does NOT
-// clobber pre-existing entries… actually, parseEnvFile DOES overwrite,
-// so the caller-side ordering (persona → org → workspace) is what enforces
-// precedence. This test pins that contract: persona is loaded into a
-// fresh map, then later layers can override.
-func TestLoadPersonaEnvFile_OverwritesEmptyMap(t *testing.T) {
-	root := t.TempDir()
-	roleDir := filepath.Join(root, "core-be")
-	if err := os.MkdirAll(roleDir, 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(roleDir, "env"),
-		[]byte("GITEA_TOKEN=persona-value\n"), 0o600); err != nil {
-		t.Fatal(err)
-	}
-	t.Setenv("MOLECULE_PERSONA_ROOT", root)
-
-	out := map[string]string{"GITEA_TOKEN": "preset"}
-	loadPersonaEnvFile("core-be", out)
-
-	// Persona helper is meant to populate a FRESH map first in the
-	// caller's flow; calling it on a pre-populated map and seeing the
-	// value get overwritten is consistent with parseEnvFile semantics.
-	if out["GITEA_TOKEN"] != "persona-value" {
-		t.Errorf("loadPersonaEnvFile did not write into existing map; got %q", out["GITEA_TOKEN"])
-	}
-}
-
-// TestIsSafeRoleName_Acceptance: positive + negative cases for the
-// validator. Pinned because every dev-tree role name must pass.
-func TestIsSafeRoleName_Acceptance(t *testing.T) {
-	good := []string{
-		"dev-lead", "core-be", "cp-security", "infra-runtime-be",
-		"sdk-dev", "plugin-dev", "documentation-specialist",
-		"triage-operator", "fullstack-engineer", "release-manager",
-		"core_underscore_ok", "X", "a1", "Z9-0",
-	}
-	for _, s := range good {
-		if !isSafeRoleName(s) {
-			t.Errorf("isSafeRoleName(%q) = false; want true", s)
-		}
-	}
-	bad := []string{
-		"", ".", "..", "with/slash", "/abs", "dot.in.middle",
-		"with space", "back\\slash", "trailing-", // trailing-hyphen is fine actually
-		"with$dollar", "with?question", "newline\nsplit",
-	}
-	// trailing-hyphen IS allowed; remove from "bad" list:
-	bad = []string{
-		"", ".", "..", "with/slash", "/abs", "dot.in.middle",
-		"with space", "back\\slash", "with$dollar", "with?question",
-		"newline\nsplit",
-	}
-	for _, s := range bad {
-		if isSafeRoleName(s) {
-			t.Errorf("isSafeRoleName(%q) = true; want false", s)
-		}
-	}
-}
--- a/workspace-server/internal/handlers/plugins_atomic.go
+++ b/workspace-server/internal/handlers/plugins_atomic.go
@ -1,207 +0,0 @@
-package handlers
-
-// plugins_atomic.go — atomic install pattern for plugin delivery into a
-// running workspace container. Closes molecule-core#114.
-//
-// Replaces the prior "tar + docker.CopyToContainer to /configs/plugins/<name>"
-// single-step write (no atomicity, no marker, no rollback) with a 4-step
-// dance:
-//
-//   1. STAGE     — extract tar into /configs/plugins/.staging/<name>.<ts>/
-//   2. SNAPSHOT  — if /configs/plugins/<name>/ exists, mv to .previous/<name>.<ts>/
-//   3. SWAP      — mv /configs/plugins/.staging/<name>.<ts>/ → /configs/plugins/<name>/
-//   4. MARKER    — touch /configs/plugins/<name>/.complete
-//
-// On any post-snapshot failure we attempt a best-effort rollback by mv-ing
-// the previous snapshot back into place. The .complete marker is the
-// canonical "this install is fully landed" signal — workspace-side plugin
-// loaders should refuse to load a plugin dir without it.
-//
-// Scope: docker path only (workspace running as a local container). The
-// SaaS path (deliverViaEIC, SSH-into-EC2) is unchanged in this PR; tracked
-// as a follow-up. The same stage-then-swap shape applies but the exec
-// primitives differ (ssh vs docker exec), and shipping both paths in one
-// PR doubles the test surface.
-
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"path"
-	"strings"
-	"time"
-
-	"github.com/docker/docker/api/types/container"
-)
-
-const (
-	pluginsRoot       = "/configs/plugins"
-	pluginsStagingDir = "/configs/plugins/.staging"
-	pluginsPrevDir    = "/configs/plugins/.previous"
-	completeMarker    = ".complete"
-)
-
-// installVersion identifies one install attempt — the plugin name plus a
-// monotonic-ish UTC timestamp suffix. Used to namespace the staging dir
-// and any snapshot of the previous version, so a reinstall mid-flight
-// can't collide with a concurrent reinstall.
-type installVersion struct {
-	plugin string
-	stamp  string // e.g. 20260508T141530Z
-}
-
-func newInstallVersion(plugin string) installVersion {
-	return installVersion{
-		plugin: plugin,
-		stamp:  time.Now().UTC().Format("20060102T150405Z"),
-	}
-}
-
-// stagedPath is the container path where the new content lands during fetch.
-// e.g. /configs/plugins/.staging/molecule-skill-foo.20260508T141530Z
-func (v installVersion) stagedPath() string {
-	return path.Join(pluginsStagingDir, v.plugin+"."+v.stamp)
-}
-
-// previousPath is where the prior live version is moved before swap.
-// e.g. /configs/plugins/.previous/molecule-skill-foo.20260508T141530Z
-func (v installVersion) previousPath() string {
-	return path.Join(pluginsPrevDir, v.plugin+"."+v.stamp)
-}
-
-// livePath is the destination after swap.
-// e.g. /configs/plugins/molecule-skill-foo
-func (v installVersion) livePath() string {
-	return path.Join(pluginsRoot, v.plugin)
-}
-
-// markerPath is the .complete file inside the live dir written last.
-func (v installVersion) markerPath() string {
-	return path.Join(v.livePath(), completeMarker)
-}
-
-// atomicCopyToContainer does a stage→snapshot→swap→marker install of a
-// host-side staged plugin tree into a running container's
-// /configs/plugins/<name>/. Returns nil on success.
-//
-// On post-snapshot failure (swap or marker write), best-effort rollback
-// restores the previous snapshot to the live path. Returns the original
-// error wrapped — the caller should surface it; rollback success is
-// logged separately.
-func (h *PluginsHandler) atomicCopyToContainer(
-	ctx context.Context, containerName, hostDir, pluginName string,
-) error {
-	v := newInstallVersion(pluginName)
-
-	// Step 0a: ensure staging + previous root dirs exist (idempotent).
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"mkdir", "-p", pluginsStagingDir, pluginsPrevDir,
-	}); err != nil {
-		return fmt.Errorf("atomic install: mkdir staging/previous: %w", err)
-	}
-
-	// Step 0b: tar the host content with a path prefix that lands it in the
-	// staging dir — NOT directly into the live name. The prefix has no
-	// leading "/" because docker.CopyToContainer extracts paths relative
-	// to the dstPath argument we pass below.
-	stagedRel := strings.TrimPrefix(v.stagedPath(), "/")
-	tarBuf, err := tarHostDirWithPrefix(hostDir, stagedRel)
-	if err != nil {
-		return fmt.Errorf("atomic install: tar host dir: %w", err)
-	}
-
-	// Step 1: STAGE — extract tar into /configs/plugins/.staging/<name>.<ts>/
-	if err := h.docker.CopyToContainer(ctx, containerName, "/", &tarBuf,
-		container.CopyToContainerOptions{}); err != nil {
-		// Best-effort: clean up any partial staging extract before returning.
-		_, _ = h.execAsRoot(ctx, containerName, []string{
-			"rm", "-rf", v.stagedPath(),
-		})
-		return fmt.Errorf("atomic install: copy to container: %w", err)
-	}
-
-	// Step 2: SNAPSHOT — if a live version exists, move it aside.
-	// `test -d` exits 0 if the dir exists, non-zero otherwise; the helper
-	// returns a non-nil error in the non-zero case which we treat as
-	// "no previous version" rather than a real failure.
-	snapshotted := false
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"test", "-d", v.livePath(),
-	}); err == nil {
-		if _, err := h.execAsRoot(ctx, containerName, []string{
-			"mv", v.livePath(), v.previousPath(),
-		}); err != nil {
-			// Snapshot failure: roll back the staged extract before failing.
-			_, _ = h.execAsRoot(ctx, containerName, []string{
-				"rm", "-rf", v.stagedPath(),
-			})
-			return fmt.Errorf("atomic install: snapshot previous version: %w", err)
-		}
-		snapshotted = true
-	}
-
-	// Step 3: SWAP — atomic rename of the staged dir into the live name.
-	// `mv` on the same filesystem is a single rename(2), atomic at the FS level.
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"mv", v.stagedPath(), v.livePath(),
-	}); err != nil {
-		// Swap failure: roll back if we had a snapshot.
-		if snapshotted {
-			if _, rbErr := h.execAsRoot(ctx, containerName, []string{
-				"mv", v.previousPath(), v.livePath(),
-			}); rbErr != nil {
-				return fmt.Errorf("atomic install: swap failed AND rollback failed: swap=%w, rollback=%v", err, rbErr)
-			}
-		}
-		// Best-effort cleanup of the still-staged dir.
-		_, _ = h.execAsRoot(ctx, containerName, []string{
-			"rm", "-rf", v.stagedPath(),
-		})
-		return fmt.Errorf("atomic install: swap to live path: %w", err)
-	}
-
-	// Step 4: MARKER — touch .complete inside the live dir as the last write.
-	// Workspace-side plugin loaders treat a plugin dir without this marker
-	// as half-installed and skip it (or surface a clear error to the
-	// operator instead of loading a possibly-partial tree).
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"touch", v.markerPath(),
-	}); err != nil {
-		// Marker write failure with the new content already in place is a
-		// weird state — content is fine on disk, but the plugin loader
-		// will refuse to use it. Log loudly; do NOT roll back, since the
-		// content is the latest, just unmarked. Operator can manually
-		// `touch <plugin>/.complete` to recover.
-		return fmt.Errorf("atomic install: write .complete marker (content landed but unmarked, manual recovery: touch %s): %w", v.markerPath(), err)
-	}
-
-	// Step 5: GC — best-effort delete the previous snapshot. Failures here
-	// just leave a directory; not load-bearing for correctness, the next
-	// install or a separate sweeper will reclaim the space.
-	if snapshotted {
-		_, _ = h.execAsRoot(ctx, containerName, []string{
-			"rm", "-rf", v.previousPath(),
-		})
-	}
-
-	return nil
-}
-
-// tarHostDirWithPrefix walks hostDir and writes a tar to a buffer with
-// every entry's name prefixed by `prefix`. Mirrors the prior streaming
-// shape used in copyPluginToContainer but with a configurable prefix
-// (the prior version hardcoded "plugins/<name>/"; we use a full
-// staging path so the extracted layout is the staging dir directly).
-//
-// Symlinks are skipped — same posture as streamDirAsTar elsewhere in
-// this file. Skipping prevents a hostile plugin from injecting a
-// symlink that, post-extract, points outside the plugin's own dir.
-func tarHostDirWithPrefix(hostDir, prefix string) (bytes.Buffer, error) {
-	var buf bytes.Buffer
-	tw := newTarWriter(&buf)
-	defer tw.Close()
-	if err := tarWalk(hostDir, prefix, tw); err != nil {
-		return bytes.Buffer{}, err
-	}
-	return buf, nil
-}
--- a/workspace-server/internal/handlers/plugins_atomic_tar.go
+++ b/workspace-server/internal/handlers/plugins_atomic_tar.go
@ -1,70 +0,0 @@
-package handlers
-
-// plugins_atomic_tar.go — tar-walk helpers split out so the main atomic
-// install flow stays readable. The prefix argument lets the caller
-// arrange where the tar's contents land at extract time.
-
-import (
-	"archive/tar"
-	"io"
-	"os"
-	"path/filepath"
-)
-
-// newTarWriter is a thin wrapper so atomic_test.go can swap the writer
-// destination if it needs to.
-func newTarWriter(w io.Writer) *tar.Writer {
-	return tar.NewWriter(w)
-}
-
-// tarWalk walks hostDir and writes every regular file + dir to the tar
-// writer with paths of the form `<prefix>/<relative>`. Symlinks are
-// skipped — same posture as streamDirAsTar in plugins_install_pipeline.go.
-//
-// The trailing-slash on prefix is normalized away: prefix "foo" and
-// prefix "foo/" produce identical archives.
-func tarWalk(hostDir, prefix string, tw *tar.Writer) error {
-	prefix = filepath.Clean(prefix)
-	return filepath.Walk(hostDir, func(p string, info os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-		if info.Mode()&os.ModeSymlink != 0 {
-			return nil // skip symlinks; see doc above
-		}
-		rel, err := filepath.Rel(hostDir, p)
-		if err != nil {
-			return err
-		}
-		if rel == "." {
-			// Emit the prefix dir itself once, with the source dir's mode.
-			hdr, err := tar.FileInfoHeader(info, "")
-			if err != nil {
-				return err
-			}
-			hdr.Name = prefix + "/"
-			return tw.WriteHeader(hdr)
-		}
-		hdr, err := tar.FileInfoHeader(info, "")
-		if err != nil {
-			return err
-		}
-		hdr.Name = filepath.Join(prefix, rel)
-		if info.IsDir() {
-			hdr.Name += "/"
-		}
-		if err := tw.WriteHeader(hdr); err != nil {
-			return err
-		}
-		if !info.Mode().IsRegular() {
-			return nil
-		}
-		f, err := os.Open(p)
-		if err != nil {
-			return err
-		}
-		defer f.Close()
-		_, err = io.Copy(tw, f)
-		return err
-	})
-}
--- a/workspace-server/internal/handlers/plugins_atomic_test.go
+++ b/workspace-server/internal/handlers/plugins_atomic_test.go
@ -1,193 +0,0 @@
-package handlers
-
-import (
-	"archive/tar"
-	"bytes"
-	"io"
-	"os"
-	"path/filepath"
-	"sort"
-	"strings"
-	"testing"
-	"time"
-)
-
-// TestInstallVersion_Paths: the path helpers must produce a stable shape
-// the in-container exec calls depend on. Pinning the layout here
-// catches a future refactor that accidentally changes where staging /
-// previous / live dirs live, which would break the swap atomicity.
-func TestInstallVersion_Paths(t *testing.T) {
-	v := installVersion{plugin: "molecule-skill-foo", stamp: "20260508T141530Z"}
-
-	if got, want := v.stagedPath(), "/configs/plugins/.staging/molecule-skill-foo.20260508T141530Z"; got != want {
-		t.Errorf("stagedPath = %q; want %q", got, want)
-	}
-	if got, want := v.previousPath(), "/configs/plugins/.previous/molecule-skill-foo.20260508T141530Z"; got != want {
-		t.Errorf("previousPath = %q; want %q", got, want)
-	}
-	if got, want := v.livePath(), "/configs/plugins/molecule-skill-foo"; got != want {
-		t.Errorf("livePath = %q; want %q", got, want)
-	}
-	if got, want := v.markerPath(), "/configs/plugins/molecule-skill-foo/.complete"; got != want {
-		t.Errorf("markerPath = %q; want %q", got, want)
-	}
-}
-
-// TestInstallVersion_StampUniqueness: two newInstallVersion calls within
-// the same second produce the same stamp (we use second precision); the
-// caller relies on the mv-rename being atomic, so collision-free
-// stamping is NOT a correctness requirement — but a regression that
-// changes stamp shape (e.g. RFC3339 with colons) would break the path
-// helpers since path.Join treats a colon as a regular char but ssh +
-// docker exec generally don't. Pin the no-colon shape.
-func TestInstallVersion_StampShape(t *testing.T) {
-	v := newInstallVersion("anything")
-	if strings.Contains(v.stamp, ":") {
-		t.Errorf("stamp must not contain colons (breaks shell-quoting in exec): %q", v.stamp)
-	}
-	if strings.Contains(v.stamp, " ") {
-		t.Errorf("stamp must not contain spaces: %q", v.stamp)
-	}
-	// Sanity: stamp parses as the documented format.
-	if _, err := time.Parse("20060102T150405Z", v.stamp); err != nil {
-		t.Errorf("stamp %q does not parse as 20060102T150405Z: %v", v.stamp, err)
-	}
-}
-
-// TestTarHostDirWithPrefix_HappyPath: walks a host dir, builds a tar with
-// the configured prefix, verifies every entry's name is rooted under
-// the prefix, and the file contents survive round-trip.
-func TestTarHostDirWithPrefix_HappyPath(t *testing.T) {
-	hostDir := t.TempDir()
-
-	// Plant: <host>/plugin.yaml + <host>/skills/foo/SKILL.md + <host>/.complete
-	files := map[string]string{
-		"plugin.yaml":             "name: foo\nversion: 1.0.0\n",
-		"skills/foo/SKILL.md":     "# Foo skill\n",
-		".complete":                "", // upstream may already have a marker
-	}
-	for rel, body := range files {
-		full := filepath.Join(hostDir, rel)
-		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
-			t.Fatal(err)
-		}
-		if err := os.WriteFile(full, []byte(body), 0o644); err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	prefix := "configs/plugins/.staging/foo.20260508T141530Z"
-	buf, err := tarHostDirWithPrefix(hostDir, prefix)
-	if err != nil {
-		t.Fatalf("tar: %v", err)
-	}
-
-	// Read back the tar; collect names + body for regular files.
-	got := map[string]string{}
-	tr := tar.NewReader(&buf)
-	for {
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatalf("tar reader: %v", err)
-		}
-		// Every entry must start with the prefix
-		if !strings.HasPrefix(hdr.Name, prefix) {
-			t.Errorf("entry %q does not start with prefix %q", hdr.Name, prefix)
-		}
-		if hdr.Typeflag == tar.TypeReg {
-			body, err := io.ReadAll(tr)
-			if err != nil {
-				t.Fatal(err)
-			}
-			rel := strings.TrimPrefix(hdr.Name, prefix+"/")
-			got[rel] = string(body)
-		}
-	}
-
-	for rel, want := range files {
-		if got[rel] != want {
-			t.Errorf("body[%q] = %q; want %q", rel, got[rel], want)
-		}
-	}
-}
-
-// TestTarHostDirWithPrefix_SkipsSymlinks: a hostile plugin shouldn't be
-// able to ship a symlink that, post-extract, points outside its own
-// dir. The walker silently skips symlinks (same posture as
-// streamDirAsTar). Verify a planted symlink doesn't appear in the tar.
-func TestTarHostDirWithPrefix_SkipsSymlinks(t *testing.T) {
-	hostDir := t.TempDir()
-	// Plant a real file + a symlink pointing outside hostDir.
-	if err := os.WriteFile(filepath.Join(hostDir, "real.txt"), []byte("ok"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	target := filepath.Join(t.TempDir(), "outside")
-	if err := os.WriteFile(target, []byte("SHOULD NOT APPEAR"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.Symlink(target, filepath.Join(hostDir, "evil")); err != nil {
-		t.Fatal(err)
-	}
-
-	buf, err := tarHostDirWithPrefix(hostDir, "p")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	names := []string{}
-	tr := tar.NewReader(&buf)
-	for {
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatal(err)
-		}
-		names = append(names, hdr.Name)
-	}
-	sort.Strings(names)
-
-	for _, n := range names {
-		if strings.Contains(n, "evil") {
-			t.Errorf("symlink leaked into tar: %q", n)
-		}
-	}
-	// real.txt should be present
-	found := false
-	for _, n := range names {
-		if strings.HasSuffix(n, "real.txt") {
-			found = true
-			break
-		}
-	}
-	if !found {
-		t.Errorf("real.txt missing from tar; got names: %v", names)
-	}
-}
-
-// TestTarHostDirWithPrefix_PrefixNormalization: trailing slash on prefix
-// should not change the archive shape. Pinning this so a future caller
-// passing "foo/" instead of "foo" doesn't double-slash entry names.
-func TestTarHostDirWithPrefix_PrefixNormalization(t *testing.T) {
-	hostDir := t.TempDir()
-	if err := os.WriteFile(filepath.Join(hostDir, "x"), []byte("y"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	a, err := tarHostDirWithPrefix(hostDir, "foo")
-	if err != nil {
-		t.Fatal(err)
-	}
-	b, err := tarHostDirWithPrefix(hostDir, "foo/")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if !bytes.Equal(a.Bytes(), b.Bytes()) {
-		t.Errorf("trailing-slash on prefix changed archive shape; tarHostDirWithPrefix should be slash-insensitive")
-	}
-}
--- a/workspace-server/internal/handlers/plugins_classifier.go
+++ b/workspace-server/internal/handlers/plugins_classifier.go
@ -1,214 +0,0 @@
-package handlers
-
-// plugins_classifier.go — diff classifier for plugin updates.
-//
-// Closes molecule-core#112. Composes with #114 (atomic install) so the
-// platform can decide *before* triggering restartFunc whether the
-// update is content-only (SKILL.md text changed; agent re-reads at next
-// Skill invocation) or structural (hooks/settings/plugin.yaml/file added
-// or removed; agent must restart to pick up the new state).
-//
-// SKILL.md content is hot-reloadable because Claude Code reads the file
-// on each Skill invocation — no in-memory cache. Hooks and settings.json
-// are loaded at session start and need a session restart. plugin.yaml
-// changes are structural by definition (manifest controls everything
-// else).
-//
-// CLASSIFICATION RULE
-//   classify(staged, live) → "skill-content-only" if and only if
-//     every file present in either tree is one of:
-//       - identical between staged and live, OR
-//       - a **/SKILL.md file with content change (text body modified)
-//     AND no files were added or removed.
-//   Anything else → "cold" (the safe default).
-//
-// The classifier reads live-tree files from inside the container via
-// `docker exec cat`. Comparison is by SHA-256 over file content, not
-// mtime — mtime changes on every install regardless of content.
-
-import (
-	"context"
-	"crypto/sha256"
-	"encoding/hex"
-	"fmt"
-	"io/fs"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-const (
-	// classifyKindSkillContentOnly: install can skip restartFunc; the
-	// only changes are SKILL.md body text.
-	classifyKindSkillContentOnly = "skill-content-only"
-	// classifyKindCold: must restart the workspace container; structural
-	// or hook/settings change.
-	classifyKindCold = "cold"
-)
-
-// classifyInstallChanges compares the staged plugin tree (host filesystem)
-// against the currently-live plugin tree inside the container. Returns
-// classifyKindSkillContentOnly when the only diff is SKILL.md content
-// changes, classifyKindCold otherwise (added/removed files, hooks/
-// settings.json edits, plugin.yaml edits, anything else).
-//
-// `noLive` is the sentinel returned when /configs/plugins/<name> doesn't
-// exist (first install for this plugin). Treated as cold — no live state
-// to hot-reload into.
-func (h *PluginsHandler) classifyInstallChanges(
-	ctx context.Context, containerName, hostStagedDir, pluginName string,
-) (string, error) {
-	livePath := "/configs/plugins/" + pluginName
-
-	// Probe: does live exist? If not, this is a first install — cold.
-	if _, err := h.execAsRoot(ctx, containerName, []string{
-		"test", "-d", livePath,
-	}); err != nil {
-		return classifyKindCold, nil
-	}
-
-	// Build hash maps for both trees.
-	stagedHashes, err := hashLocalTree(hostStagedDir)
-	if err != nil {
-		return classifyKindCold, fmt.Errorf("classifier: hash staged: %w", err)
-	}
-	liveHashes, err := h.hashContainerTree(ctx, containerName, livePath)
-	if err != nil {
-		// Live tree read failure: be conservative, cold-restart.
-		return classifyKindCold, nil
-	}
-
-	// Drop the .complete marker from comparison — its mtime/atime can
-	// vary across installs but content is empty/trivial; including it
-	// would force-cold every reinstall.
-	delete(stagedHashes, ".complete")
-	delete(liveHashes, ".complete")
-
-	// Set difference: any file in one but not the other → cold.
-	for rel := range stagedHashes {
-		if _, ok := liveHashes[rel]; !ok {
-			return classifyKindCold, nil // file added
-		}
-	}
-	for rel := range liveHashes {
-		if _, ok := stagedHashes[rel]; !ok {
-			return classifyKindCold, nil // file removed
-		}
-	}
-
-	// Same set of files. Walk the diff.
-	for rel, stagedHash := range stagedHashes {
-		liveHash := liveHashes[rel]
-		if stagedHash == liveHash {
-			continue
-		}
-		// Content differs. Allow if and only if it's a SKILL.md.
-		if !isSkillMarkdown(rel) {
-			return classifyKindCold, nil
-		}
-	}
-	return classifyKindSkillContentOnly, nil
-}
-
-// isSkillMarkdown returns true for any path whose basename is SKILL.md
-// (case-sensitive, matches Claude Code's skill discovery rule).
-func isSkillMarkdown(rel string) bool {
-	return filepath.Base(rel) == "SKILL.md"
-}
-
-// hashLocalTree walks a host directory and returns rel-path → sha256-hex.
-// Symlinks are skipped (same posture as the tar walker).
-func hashLocalTree(root string) (map[string]string, error) {
-	out := map[string]string{}
-	err := filepath.WalkDir(root, func(p string, d fs.DirEntry, err error) error {
-		if err != nil {
-			return err
-		}
-		if d.IsDir() {
-			return nil
-		}
-		info, err := d.Info()
-		if err != nil {
-			return err
-		}
-		if info.Mode()&os.ModeSymlink != 0 {
-			return nil
-		}
-		if !info.Mode().IsRegular() {
-			return nil
-		}
-		rel, err := filepath.Rel(root, p)
-		if err != nil {
-			return err
-		}
-		body, err := os.ReadFile(p)
-		if err != nil {
-			return err
-		}
-		sum := sha256.Sum256(body)
-		out[filepath.ToSlash(rel)] = hex.EncodeToString(sum[:])
-		return nil
-	})
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-// hashContainerTree reads every regular file under livePath via docker
-// exec sh -c 'cd <livePath> && find . -type f -not -name .complete | xargs -I {} sh -c "echo {}; sha256sum {}"'.
-//
-// The output is parsed line-by-line into rel-path → sha256-hex.
-func (h *PluginsHandler) hashContainerTree(
-	ctx context.Context, containerName, livePath string,
-) (map[string]string, error) {
-	out, err := h.execAsRoot(ctx, containerName, []string{
-		"sh", "-c",
-		// Find regular files, hash each, output `<hex>  ./<relpath>`.
-		// `cd` then `find .` keeps paths relative to livePath.
-		fmt.Sprintf("cd %s && find . -type f -print0 | xargs -0 -r sha256sum 2>/dev/null", shQuote(livePath)),
-	})
-	if err != nil {
-		return nil, fmt.Errorf("hash container tree: %w", err)
-	}
-	hashes := map[string]string{}
-	for _, line := range strings.Split(out, "\n") {
-		line = strings.TrimSpace(line)
-		if line == "" {
-			continue
-		}
-		// sha256sum output: "<hex>  <path>" (two spaces). Path starts with "./".
-		parts := strings.SplitN(line, "  ", 2)
-		if len(parts) != 2 {
-			continue
-		}
-		hash := parts[0]
-		rel := strings.TrimPrefix(parts[1], "./")
-		hashes[rel] = hash
-	}
-	return hashes, nil
-}
-
-// shQuote single-quotes a string for safe insertion into a shell command.
-// Returns the input unchanged if it's already shell-safe (alphanumeric +
-// /._-). Otherwise wraps in single quotes and escapes inner '.
-func shQuote(s string) string {
-	safe := true
-	for _, c := range s {
-		switch {
-		case c >= 'a' && c <= 'z':
-		case c >= 'A' && c <= 'Z':
-		case c >= '0' && c <= '9':
-		case c == '/' || c == '.' || c == '_' || c == '-':
-		default:
-			safe = false
-		}
-		if !safe {
-			break
-		}
-	}
-	if safe {
-		return s
-	}
-	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
-}
--- a/workspace-server/internal/handlers/plugins_classifier_test.go
+++ b/workspace-server/internal/handlers/plugins_classifier_test.go
@ -1,121 +0,0 @@
-package handlers
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-// TestIsSkillMarkdown: pin which paths the classifier considers
-// hot-reloadable. SKILL.md by basename only — case-sensitive.
-func TestIsSkillMarkdown(t *testing.T) {
-	yes := []string{
-		"SKILL.md",
-		"skills/foo/SKILL.md",
-		"deeply/nested/SKILL.md",
-	}
-	no := []string{
-		"plugin.yaml",
-		"hooks.json",
-		"settings.json",
-		"README.md",
-		"skill.md",  // case-sensitive
-		"SKILLS.md", // not a skill file
-		"skills/foo/extra.md",
-	}
-	for _, s := range yes {
-		if !isSkillMarkdown(s) {
-			t.Errorf("isSkillMarkdown(%q) = false; want true", s)
-		}
-	}
-	for _, s := range no {
-		if isSkillMarkdown(s) {
-			t.Errorf("isSkillMarkdown(%q) = true; want false", s)
-		}
-	}
-}
-
-// TestHashLocalTree_StableHash: hashing the same content twice must
-// produce identical maps. Pinned because if hashLocalTree ever picks up
-// mtime/inode (e.g. via a refactor to use os.Lstat metadata), every
-// install would classify as cold and we'd lose the hot-reload.
-func TestHashLocalTree_StableHash(t *testing.T) {
-	dir := t.TempDir()
-	if err := os.MkdirAll(filepath.Join(dir, "skills/foo"), 0o755); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(dir, "plugin.yaml"), []byte("name: foo\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.WriteFile(filepath.Join(dir, "skills/foo/SKILL.md"), []byte("# Foo\n"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-
-	h1, err := hashLocalTree(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	h2, err := hashLocalTree(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if len(h1) != len(h2) {
-		t.Fatalf("hash count differs: %d vs %d", len(h1), len(h2))
-	}
-	for k, v := range h1 {
-		if h2[k] != v {
-			t.Errorf("hash[%q] differs: %q vs %q", k, v, h2[k])
-		}
-	}
-}
-
-// TestHashLocalTree_SymlinkSkipped: symlinks should not appear in the
-// hash map — same posture as the tar walker. Otherwise a hostile plugin
-// could include a symlink whose hash changes when its target changes,
-// silently flipping classification.
-func TestHashLocalTree_SymlinkSkipped(t *testing.T) {
-	dir := t.TempDir()
-	if err := os.WriteFile(filepath.Join(dir, "real.txt"), []byte("ok"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	target := filepath.Join(t.TempDir(), "target")
-	if err := os.WriteFile(target, []byte("outside"), 0o644); err != nil {
-		t.Fatal(err)
-	}
-	if err := os.Symlink(target, filepath.Join(dir, "link")); err != nil {
-		t.Fatal(err)
-	}
-
-	h, err := hashLocalTree(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if _, exists := h["link"]; exists {
-		t.Errorf("symlink leaked into hash map: %v", h)
-	}
-	if _, exists := h["real.txt"]; !exists {
-		t.Errorf("real.txt missing from hash map: %v", h)
-	}
-}
-
-// TestShQuote: the classifier injects livePath into a shell command via
-// docker exec. Path must be quoted to handle pluginName entries with
-// hyphens (which are safe but exercised here) and any future special-
-// character edge case. Pin the safe-vs-quoted boundary.
-func TestShQuote(t *testing.T) {
-	cases := []struct {
-		in, want string
-	}{
-		{"foo", "foo"},
-		{"/configs/plugins/foo-bar", "/configs/plugins/foo-bar"},
-		{"with space", "'with space'"},
-		{"with'quote", "'with'\\''quote'"},
-		{"$envvar", "'$envvar'"},
-		{"path/with/dots.txt", "path/with/dots.txt"},
-	}
-	for _, tc := range cases {
-		if got := shQuote(tc.in); got != tc.want {
-			t.Errorf("shQuote(%q) = %q; want %q", tc.in, got, tc.want)
-		}
-	}
-}
--- a/workspace-server/internal/handlers/plugins_install.go
+++ b/workspace-server/internal/handlers/plugins_install.go
@ -91,14 +91,6 @@ func (h *PluginsHandler) Install(c *gin.Context) {
 		return
 	}

-	// Record the install in workspace_plugins (core#113 — version-subscription
-	// foundation). Best-effort: DB write failure is logged but doesn't fail
-	// the install — the plugin IS in the container; surfacing a 500 here
-	// would mislead the caller about the install state.
-	if err := recordWorkspacePluginInstall(ctx, workspaceID, result.PluginName, result.Source.Raw(), req.Track); err != nil {
-		log.Printf("Plugin install: failed to record %s for %s in workspace_plugins: %v (install succeeded; tracking row missing)", result.PluginName, workspaceID, err)
-	}
-
 	log.Printf("Plugin install: %s via %s → workspace %s (restarting)", result.PluginName, result.Source.Scheme, workspaceID)
 	c.JSON(http.StatusOK, gin.H{
 		"status": "installed",
--- a/workspace-server/internal/handlers/plugins_install_pipeline.go
+++ b/workspace-server/internal/handlers/plugins_install_pipeline.go
@ -114,15 +114,6 @@ type installRequest struct {
 	// When present, resolveAndStage verifies the fetched content matches
 	// before allowing the install to proceed (SAFE-T1102 supply-chain hardening).
 	SHA256 string `json:"sha256,omitempty"`
-	// Track is the version-subscription mode for this install (core#113):
-	//   "none"        — no auto-update tracking (default)
-	//   "tag:vX.Y.Z"  — track a specific version tag
-	//   "tag:latest"  — track latest tag, drift on every new tag
-	//   "sha:<full>"  — pinned, no drift ever
-	// The drift detector (separate component, follow-up) reads
-	// workspace_plugins rows where tracked_ref != 'none' and queues
-	// updates when upstream resolves to a different SHA.
-	Track string `json:"track,omitempty"`
 }

 // stageResult bundles the outputs of resolveAndStage for the caller.
@ -285,22 +276,7 @@ func (h *PluginsHandler) resolveAndStage(ctx context.Context, req installRequest
 // using NewPluginsHandler without a DB; production wires it in router.go.
 func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID string, r *stageResult) error {
 	if containerName := h.findRunningContainer(ctx, workspaceID); containerName != "" {
-		// Hot-reload classifier (molecule-core#112) — decide BEFORE the
-		// install whether this update can skip restartFunc. SKILL.md
-		// content changes are filesystem-visible to Claude Code on the
-		// next Skill invocation; hooks / settings.json / plugin.yaml /
-		// added-or-removed files need a container restart.
-		// Classifier reads live tree from container; on any read error
-		// it returns kindCold so we never hot-reload speculatively.
-		kind, _ := h.classifyInstallChanges(ctx, containerName, r.StagedDir, r.PluginName)
-
-		// Atomic stage→snapshot→swap→marker (molecule-core#114).
-		// Replaces the prior single docker.CopyToContainer write that
-		// left a partially-extracted tree on mid-install failure with
-		// no rollback path. atomicCopyToContainer writes a .complete
-		// marker as the last step; workspace-side plugin loaders should
-		// refuse to load a plugin dir without it.
-		if err := h.atomicCopyToContainer(ctx, containerName, r.StagedDir, r.PluginName); err != nil {
+		if err := h.copyPluginToContainer(ctx, containerName, r.StagedDir, r.PluginName); err != nil {
 			log.Printf("Plugin install: failed to copy %s to %s: %v", r.PluginName, workspaceID, err)
 			return newHTTPErr(http.StatusInternalServerError, gin.H{"error": "failed to copy plugin to container"})
 		}
@ -308,11 +284,7 @@ func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID str
 			"chown", "-R", "1000:1000", "/configs/plugins/" + r.PluginName,
 		})
 		if h.restartFunc != nil {
-			if kind == classifyKindSkillContentOnly {
-				log.Printf("Plugin install: %s → workspace %s — SKILL-content-only update, SKIPPING restart", r.PluginName, workspaceID)
-			} else {
-				go h.restartFunc(workspaceID)
-			}
+			go h.restartFunc(workspaceID)
 		}
 		return nil
 	}
--- a/workspace-server/internal/handlers/plugins_tracking.go
+++ b/workspace-server/internal/handlers/plugins_tracking.go
@ -1,78 +0,0 @@
-package handlers
-
-// plugins_tracking.go — workspace_plugins DB tracking for the
-// version-subscription model (core#113).
-//
-// Schema lives in migration 20260508160000_workspace_plugins_tracking.up.sql.
-// This file is the Go-side write surface used at install time to record
-// each plugin's install record. Drift detection / queue / apply are
-// follow-up scope (filed as a separate issue once this lands).
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"strings"
-
-	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
-)
-
-// trackedRefValues is the closed set of bare-string values the
-// workspace_plugins.tracked_ref column accepts. Prefixed values
-// ("tag:..." / "sha:...") are validated structurally below.
-var trackedRefValues = map[string]bool{
-	"none": true,
-}
-
-// validateTrackedRef returns the canonical form of a track string, or
-// an error if the input is malformed. Empty input → "none" (default).
-//
-// Accepted shapes:
-//
-//	""                — defaults to "none"
-//	"none"            — no tracking
-//	"tag:vX.Y.Z"      — track a specific tag
-//	"tag:latest"      — track latest tag, drift on every new tag
-//	"sha:<full-sha>"  — pinned to commit SHA
-func validateTrackedRef(s string) (string, error) {
-	s = strings.TrimSpace(s)
-	if s == "" {
-		return "none", nil
-	}
-	if trackedRefValues[s] {
-		return s, nil
-	}
-	if strings.HasPrefix(s, "tag:") && len(s) > 4 {
-		return s, nil
-	}
-	if strings.HasPrefix(s, "sha:") && len(s) > 4 {
-		return s, nil
-	}
-	return "", fmt.Errorf("invalid track value %q: expected 'none' | 'tag:vX.Y.Z' | 'tag:latest' | 'sha:<full>'", s)
-}
-
-// recordWorkspacePluginInstall upserts the workspace_plugins row for a
-// plugin install. ON CONFLICT (workspace_id, plugin_name) DO UPDATE so
-// reinstalling the same plugin name (with a possibly-different source or
-// track value) updates the existing row rather than failing.
-func recordWorkspacePluginInstall(
-	ctx context.Context, workspaceID, pluginName, sourceRaw, track string,
-) error {
-	if workspaceID == "" || pluginName == "" || sourceRaw == "" {
-		return errors.New("recordWorkspacePluginInstall: missing required field")
-	}
-	canonicalTrack, err := validateTrackedRef(track)
-	if err != nil {
-		return err
-	}
-	_, err = db.DB.ExecContext(ctx, `
-		INSERT INTO workspace_plugins (workspace_id, plugin_name, source_raw, tracked_ref)
-		VALUES ($1, $2, $3, $4)
-		ON CONFLICT (workspace_id, plugin_name)
-		DO UPDATE SET
-			source_raw  = EXCLUDED.source_raw,
-			tracked_ref = EXCLUDED.tracked_ref,
-			updated_at  = NOW()
-	`, workspaceID, pluginName, sourceRaw, canonicalTrack)
-	return err
-}
--- a/workspace-server/internal/handlers/plugins_tracking_test.go
+++ b/workspace-server/internal/handlers/plugins_tracking_test.go
@ -1,54 +0,0 @@
-package handlers
-
-import "testing"
-
-// TestValidateTrackedRef: pin the exact set of accepted track values
-// the install endpoint stores. Drift detector reads this column; any
-// value that slips through here without structural validation would
-// silently fail at drift-check time.
-func TestValidateTrackedRef(t *testing.T) {
-	cases := []struct {
-		in   string
-		want string
-		err  bool
-	}{
-		// Defaults
-		{"", "none", false},
-		{"   ", "none", false},
-		{"none", "none", false},
-
-		// Tag shape
-		{"tag:v1.0.0", "tag:v1.0.0", false},
-		{"tag:v0.4.0-gitea.1", "tag:v0.4.0-gitea.1", false},
-		{"tag:latest", "tag:latest", false},
-
-		// SHA shape
-		{"sha:abc123", "sha:abc123", false},
-		{"sha:0123456789abcdef0123456789abcdef01234567", "sha:0123456789abcdef0123456789abcdef01234567", false},
-
-		// Reject malformed
-		{"tag:", "", true},      // empty after prefix
-		{"sha:", "", true},      // empty after prefix
-		{"latest", "", true},    // bare 'latest' is ambiguous (tag? branch?)
-		{"main", "", true},      // bare branch name not allowed
-		{"v1.0.0", "", true},    // missing tag: prefix
-		{"random", "", true},    // not in allowlist
-		{"tag", "", true},       // prefix without separator
-	}
-	for _, tc := range cases {
-		got, err := validateTrackedRef(tc.in)
-		if tc.err {
-			if err == nil {
-				t.Errorf("validateTrackedRef(%q) = (%q, nil); want error", tc.in, got)
-			}
-			continue
-		}
-		if err != nil {
-			t.Errorf("validateTrackedRef(%q) error: %v", tc.in, err)
-			continue
-		}
-		if got != tc.want {
-			t.Errorf("validateTrackedRef(%q) = %q; want %q", tc.in, got, tc.want)
-		}
-	}
-}
--- a/workspace-server/internal/handlers/workspace_crud.go
+++ b/workspace-server/internal/handlers/workspace_crud.go
@ -323,19 +323,161 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
 		return
 	}

-	// Delegate the cascade to CascadeDelete so the HTTP path and the
-	// OrgImport reconcile path share one teardown sequence (#73 race
-	// guard, container stop, volume removal, token revocation, schedule
-	// disable, broadcast). The HTTP-specific bits — direct-children 409
-	// gate above, ?purge=true hard-delete below, response shaping —
-	// stay in this handler.
-	descendantIDs, stopErrs, err := h.CascadeDelete(ctx, id)
-	if err != nil {
-		log.Printf("Delete: CascadeDelete(%s) failed: %v", id, err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
+	// Cascade delete: collect ALL descendants (not just direct children) via
+	// recursive CTE, then stop each container and remove each volume.
+	// Previous bug: only direct children's containers were stopped, leaving
+	// grandchildren as orphan running containers after a cascade delete.
+	descendantIDs := []string{}
+	if len(children) > 0 {
+		descRows, err := db.DB.QueryContext(ctx, `
+			WITH RECURSIVE descendants AS (
+				SELECT id FROM workspaces WHERE parent_id = $1 AND status != 'removed'
+				UNION ALL
+				SELECT w.id FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status != 'removed'
+			)
+			SELECT id FROM descendants
+		`, id)
+		if err != nil {
+			log.Printf("Delete: descendant query error for %s: %v", id, err)
+		} else {
+			for descRows.Next() {
+				var descID string
+				if descRows.Scan(&descID) == nil {
+					descendantIDs = append(descendantIDs, descID)
+				}
+			}
+			descRows.Close()
+		}
 	}
+
+	// #73 fix: mark rows 'removed' in the DB FIRST, BEFORE stopping containers
+	// or removing volumes. Previously the sequence was stop → update-status,
+	// which left a gap where:
+	//   - the container's last pre-teardown heartbeat could resurrect the row
+	//     via the register-handler UPSERT (now also guarded in #73)
+	//   - the liveness monitor could observe 'online' status + expired Redis
+	//     TTL and trigger RestartByID, recreating a container we're trying
+	//     to destroy
+	// Marking 'removed' first makes both of those paths no-op via their
+	// existing `status NOT IN ('removed', ...)` guards.
 	allIDs := append([]string{id}, descendantIDs...)
+	if _, err := db.DB.ExecContext(ctx,
+		`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = ANY($2::uuid[])`,
+		models.StatusRemoved, pq.Array(allIDs)); err != nil {
+		log.Printf("Delete status update error for %s: %v", id, err)
+	}
+	if _, err := db.DB.ExecContext(ctx,
+		`DELETE FROM canvas_layouts WHERE workspace_id = ANY($1::uuid[])`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete canvas_layouts error for %s: %v", id, err)
+	}
+	// Revoke all auth tokens for the deleted workspaces. Once the workspace is
+	// gone its tokens are meaningless; leaving them alive would keep
+	// HasAnyLiveTokenGlobal = true even after the platform is otherwise empty,
+	// which prevents AdminAuth from returning to fail-open and breaks the E2E
+	// test's count-zero assertion (and local re-run cleanup).
+	if _, err := db.DB.ExecContext(ctx,
+		`UPDATE workspace_auth_tokens SET revoked_at = now()
+		 WHERE workspace_id = ANY($1::uuid[]) AND revoked_at IS NULL`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete token revocation error for %s: %v", id, err)
+	}
+	// #1027: cascade-disable all schedules for the deleted workspaces so
+	// the scheduler never fires a cron into a removed container.
+	if _, err := db.DB.ExecContext(ctx,
+		`UPDATE workspace_schedules SET enabled = false, updated_at = now()
+		 WHERE workspace_id = ANY($1::uuid[]) AND enabled = true`,
+		pq.Array(allIDs)); err != nil {
+		log.Printf("Delete schedule disable error for %s: %v", id, err)
+	}
+
+	// Now stop containers + remove volumes for all descendants (any depth).
+	// Any concurrent heartbeat / registration / liveness-triggered restart
+	// will see status='removed' and bail out early.
+	//
+	// Combines two concerns:
+	//
+	//  1. Detach cleanup from the request ctx via WithoutCancel + a 30s
+	//     timeout, so when the canvas's `api.del` resolves on our 200
+	//     (and gin cancels c.Request.Context()), in-flight Docker
+	//     stop/remove calls don't get cancelled mid-operation. The
+	//     previous shape leaked containers every time the canvas hung
+	//     up promptly: Stop returned "context canceled", the container
+	//     stayed up, and the next RemoveVolume failed with
+	//     "volume in use". 30s is generous for Docker daemon round-
+	//     trips (typical: <2s) and bounds a stuck daemon.
+	//
+	//  2. #1843: aggregate Stop() failures into stopErrs so the
+	//     post-deletion block surfaces them as 500. On the CP/EC2
+	//     backend, Stop() calls control plane's DELETE endpoint to
+	//     terminate the EC2; if that errors (transient 5xx, network),
+	//     the EC2 stays running with no DB row to track it (the
+	//     "orphan EC2 on a 0-customer account" scenario). Loud-fail
+	//     instead of silent-leak — clients retry, Stop's instance_id
+	//     lookup is idempotent against status='removed'. RemoveVolume
+	//     errors stay log-and-continue (local cleanup, not infra-leak).
+	cleanupCtx, cleanupCancel := context.WithTimeout(
+		context.WithoutCancel(ctx), 30*time.Second)
+	defer cleanupCancel()
+
+	var stopErrs []error
+	stopAndRemove := func(wsID string) {
+		// Stop the workload first via the backend dispatcher (CP for
+		// SaaS, Docker for self-hosted). Pre-2026-05-05 this gate was
+		// `if h.provisioner == nil { return }` — early-returning on
+		// every SaaS tenant left the EC2 running with no DB row to
+		// track it (issue #2814; the comment below claimed "loud-fail
+		// instead of silent-leak" but the early-return made it the
+		// silent path on SaaS).
+		//
+		// Check Stop's error before any volume cleanup — the previous
+		// code discarded it and immediately tried RemoveVolume, which
+		// always fails with "volume in use" when Stop didn't actually
+		// kill the container. The orphan sweeper
+		// (registry/orphan_sweeper.go) catches what we skip here on
+		// the next reconcile pass.
+		if err := h.StopWorkspaceAuto(cleanupCtx, wsID); err != nil {
+			log.Printf("Delete %s stop failed: %v — leaving cleanup for orphan sweeper", wsID, err)
+			stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err))
+			return
+		}
+		// Volume cleanup is Docker-only — CP-managed workspaces have
+		// no host-bind volumes to remove. Skip silently when no Docker
+		// provisioner is wired (the SaaS path already terminated the
+		// EC2 above; nothing left to do).
+		if h.provisioner != nil {
+			if err := h.provisioner.RemoveVolume(cleanupCtx, wsID); err != nil {
+				log.Printf("Delete %s volume removal warning: %v", wsID, err)
+			}
+		}
+	}
+
+	for _, descID := range descendantIDs {
+		stopAndRemove(descID)
+		db.ClearWorkspaceKeys(cleanupCtx, descID)
+		// #2269: drop the per-workspace restartState entry so it
+		// doesn't accumulate across the platform's lifetime. The
+		// LoadOrStore that creates the entry (workspace_restart.go)
+		// has no companion remove path; without this Delete, every
+		// short-lived workspace leaks ~16 bytes forever.
+		restartStates.Delete(descID)
+		// Detach broadcaster ctx for the same reason as the cleanup
+		// above — RecordAndBroadcast does an INSERT INTO
+		// structure_events + Redis Publish. If the canvas hangs up,
+		// a request-ctx-bound INSERT can be cancelled mid-write,
+		// leaving other WS clients ignorant of the cascade. The DB
+		// row is already 'removed' so it's recoverable, but the
+		// inconsistency is avoidable.
+		h.broadcaster.RecordAndBroadcast(cleanupCtx, string(events.EventWorkspaceRemoved), descID, map[string]interface{}{})
+	}
+
+	stopAndRemove(id)
+	db.ClearWorkspaceKeys(cleanupCtx, id)
+	restartStates.Delete(id) // #2269: same as descendants above
+
+	h.broadcaster.RecordAndBroadcast(cleanupCtx, string(events.EventWorkspaceRemoved), id, map[string]interface{}{
+		"cascade_deleted": len(descendantIDs),
+	})

 	// If any Stop call failed, surface 500 so the client retries. The DB
 	// row is already 'removed' (idempotent), and Stop's instance_id
@ -401,104 +543,6 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"status": "removed", "cascade_deleted": len(descendantIDs)})
 }

-// CascadeDelete performs the cascade-removal sequence used by the HTTP
-// DELETE handler and by OrgImport's reconcile mode: walk descendants, mark
-// self+descendants 'removed' first (#73 race guard), stop containers / EC2s,
-// remove volumes, revoke tokens, disable schedules, broadcast events.
-//
-// Idempotent against already-removed rows (the descendant CTE and all UPDATE
-// guards skip status='removed'). Returns the descendant id list so the HTTP
-// caller can drive the optional `?purge=true` hard-delete path against the
-// same set the cascade just touched, plus any per-workspace stop errors so
-// callers can surface a retryable failure instead of a silent-leak.
-//
-// Caller is responsible for the children-confirmation gate (the HTTP handler
-// returns 409 when children exist + ?confirm=true is missing); this helper
-// always cascades.
-func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]string, []error, error) {
-	if err := validateWorkspaceID(id); err != nil {
-		return nil, nil, err
-	}
-
-	descendantIDs := []string{}
-	descRows, err := db.DB.QueryContext(ctx, `
-		WITH RECURSIVE descendants AS (
-			SELECT id FROM workspaces WHERE parent_id = $1 AND status != 'removed'
-			UNION ALL
-			SELECT w.id FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status != 'removed'
-		)
-		SELECT id FROM descendants
-	`, id)
-	if err != nil {
-		return nil, nil, fmt.Errorf("descendant query: %w", err)
-	}
-	for descRows.Next() {
-		var descID string
-		if descRows.Scan(&descID) == nil {
-			descendantIDs = append(descendantIDs, descID)
-		}
-	}
-	descRows.Close()
-
-	allIDs := append([]string{id}, descendantIDs...)
-
-	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = ANY($2::uuid[])`,
-		models.StatusRemoved, pq.Array(allIDs)); err != nil {
-		log.Printf("CascadeDelete status update for %s: %v", id, err)
-	}
-	if _, err := db.DB.ExecContext(ctx,
-		`DELETE FROM canvas_layouts WHERE workspace_id = ANY($1::uuid[])`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("CascadeDelete canvas_layouts for %s: %v", id, err)
-	}
-	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspace_auth_tokens SET revoked_at = now()
-		 WHERE workspace_id = ANY($1::uuid[]) AND revoked_at IS NULL`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("CascadeDelete token revocation for %s: %v", id, err)
-	}
-	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspace_schedules SET enabled = false, updated_at = now()
-		 WHERE workspace_id = ANY($1::uuid[]) AND enabled = true`,
-		pq.Array(allIDs)); err != nil {
-		log.Printf("CascadeDelete schedule disable for %s: %v", id, err)
-	}
-
-	cleanupCtx, cleanupCancel := context.WithTimeout(
-		context.WithoutCancel(ctx), 30*time.Second)
-	defer cleanupCancel()
-
-	var stopErrs []error
-	stopAndRemove := func(wsID string) {
-		if err := h.StopWorkspaceAuto(cleanupCtx, wsID); err != nil {
-			log.Printf("CascadeDelete %s stop failed: %v — leaving cleanup for orphan sweeper", wsID, err)
-			stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err))
-			return
-		}
-		if h.provisioner != nil {
-			if err := h.provisioner.RemoveVolume(cleanupCtx, wsID); err != nil {
-				log.Printf("CascadeDelete %s volume removal warning: %v", wsID, err)
-			}
-		}
-	}
-
-	for _, descID := range descendantIDs {
-		stopAndRemove(descID)
-		db.ClearWorkspaceKeys(cleanupCtx, descID)
-		restartStates.Delete(descID)
-		h.broadcaster.RecordAndBroadcast(cleanupCtx, string(events.EventWorkspaceRemoved), descID, map[string]interface{}{})
-	}
-	stopAndRemove(id)
-	db.ClearWorkspaceKeys(cleanupCtx, id)
-	restartStates.Delete(id)
-	h.broadcaster.RecordAndBroadcast(cleanupCtx, string(events.EventWorkspaceRemoved), id, map[string]interface{}{
-		"cascade_deleted": len(descendantIDs),
-	})
-
-	return descendantIDs, stopErrs, nil
-}
-
 // validateWorkspaceID returns an error when id is not a valid UUID.
 // #687: prevents 500s from Postgres when a garbage string (e.g. ../../etc/passwd)
 // is passed as the :id path parameter.
--- a/workspace-server/internal/handlers/workspace_provision.go
+++ b/workspace-server/internal/handlers/workspace_provision.go
@ -715,30 +715,14 @@ func deriveProviderFromModelSlug(model string) string {
 // payload.Model at boot), this is a no-op — no harm in the switch
 // being empty for those cases.
 func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
-	// Resolution order (priority high → low):
-	//   1. payload.Model (caller passed the canvas-picked model id verbatim)
-	//   2. envVars["MODEL"]  (workspace_secret persisted by /org/import via
-	//      the persona env file — MODEL=MiniMax-M2.7-highspeed etc.)
-	//   3. envVars["MODEL_PROVIDER"] (legacy: this secret was historically a
-	//      *model id* set by canvas Save+Restart's PUT /model; on the
-	//      post-2026-05-08 persona-env convention it's a *provider slug*
-	//      (e.g. "minimax") which is NOT a valid model id, so this fallback
-	//      only fires when MODEL is absent.)
-	//
-	// Pre-fix bug: this function unconditionally OVERWROTE envVars["MODEL"]
-	// with the MODEL_PROVIDER slug (when payload.Model was empty), wiping
-	// the operator's explicit per-persona MODEL secret on every restart.
-	// Symptom: a workspace whose persona env said
-	// MODEL=MiniMax-M2.7-highspeed booted fine on first /org/import (the
-	// envVars map was populated direct from the env file), then on the
-	// next Restart the workspace_secrets-derived MODEL got clobbered by
-	// MODEL_PROVIDER="minimax" — the literal slug, not a valid model id —
-	// and the workspace template's adapter routed to providers[0]
-	// (anthropic-oauth) and wedged at SDK initialize. Caught 2026-05-08
-	// during Phase 4 verification of template-claude-code PR #9.
-	if model == "" {
-		model = envVars["MODEL"]
-	}
+	// Fall back to the MODEL_PROVIDER workspace secret when the caller
+	// didn't pass one explicitly. This is the path that "Save+Restart"
+	// hits — Restart builds its payload from the workspaces row (no model
+	// column there) so payload.Model is always empty, but the user's
+	// canvas selection was stored as MODEL_PROVIDER via PUT /model and
+	// is already loaded into envVars here. Without this fallback hermes
+	// silently boots with the template default and errors "No LLM
+	// provider configured" even though the user picked a valid model.
 	if model == "" {
 		model = envVars["MODEL_PROVIDER"]
 	}
--- a/workspace-server/internal/handlers/workspace_provision_shared_test.go
+++ b/workspace-server/internal/handlers/workspace_provision_shared_test.go
@ -724,68 +724,3 @@ func TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes(t *testing.T) {
 		})
 	}
 }
-
-// TestApplyRuntimeModelEnv_PersonaEnvMODELSecretPreserved locks in the
-// 2026-05-08 fix that prevents the MODEL_PROVIDER-as-slug fallback from
-// silently overwriting a per-persona MODEL workspace_secret on restart.
-//
-// Pre-fix bug recurrence guard: when the persona env file (loaded into
-// workspace_secrets at /org/import time) declares both MODEL=<id> and
-// MODEL_PROVIDER=<slug>, the restart path used to overwrite envVars["MODEL"]
-// with the MODEL_PROVIDER slug because applyRuntimeModelEnv'\''s
-// payload.Model fallback consulted MODEL_PROVIDER first. Symptom: dev-tree
-// workspaces booted fine on first /org/import, then on next restart the
-// model id became literal "minimax" and the workspace template'\''s adapter
-// failed to match any registry prefix, fell through to anthropic-oauth,
-// and wedged at SDK initialize. Caught during Phase 4 verification of
-// template-claude-code PR #9.
-func TestApplyRuntimeModelEnv_PersonaEnvMODELSecretPreserved(t *testing.T) {
-	cases := []struct {
-		name      string
-		envMODEL  string
-		envMP     string
-		wantMODEL string
-	}{
-		{
-			name:      "MODEL secret wins over MODEL_PROVIDER slug (persona-env shape on restart)",
-			envMODEL:  "MiniMax-M2.7-highspeed",
-			envMP:     "minimax",
-			wantMODEL: "MiniMax-M2.7-highspeed",
-		},
-		{
-			name:      "MODEL secret wins even when same as MODEL_PROVIDER",
-			envMODEL:  "opus",
-			envMP:     "claude-code",
-			wantMODEL: "opus",
-		},
-		{
-			name:      "MODEL absent → fall back to MODEL_PROVIDER (legacy canvas Save+Restart shape)",
-			envMODEL:  "",
-			envMP:     "MiniMax-M2.7",
-			wantMODEL: "MiniMax-M2.7",
-		},
-		{
-			name:      "Both absent → no MODEL set",
-			envMODEL:  "",
-			envMP:     "",
-			wantMODEL: "",
-		},
-	}
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			envVars := map[string]string{}
-			if tc.envMODEL != "" {
-				envVars["MODEL"] = tc.envMODEL
-			}
-			if tc.envMP != "" {
-				envVars["MODEL_PROVIDER"] = tc.envMP
-			}
-			// payload.Model is empty (the restart case)
-			applyRuntimeModelEnv(envVars, "claude-code", "")
-			if got := envVars["MODEL"]; got != tc.wantMODEL {
-				t.Errorf("MODEL = %q, want %q (envMODEL=%q envMP=%q)",
-					got, tc.wantMODEL, tc.envMODEL, tc.envMP)
-			}
-		})
-	}
-}
--- a/workspace-server/internal/handlers/workspace_test.go
+++ b/workspace-server/internal/handlers/workspace_test.go
@ -813,12 +813,6 @@ func TestWorkspaceDelete_DisablesSchedules(t *testing.T) {
 		WithArgs(wsID).
 		WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))

-	// CascadeDelete walks descendants unconditionally — 0-children case
-	// returns 0 rows here.
-	mock.ExpectQuery("WITH RECURSIVE descendants").
-		WithArgs(wsID).
-		WillReturnRows(sqlmock.NewRows([]string{"id"}))
-
 	// Mark workspace as removed
 	mock.ExpectExec("UPDATE workspaces SET status =").
 		WillReturnResult(sqlmock.NewResult(0, 1))
@ -941,12 +935,6 @@ func TestWorkspaceDelete_ScheduleDisableOnlyTargetsDeletedWorkspace(t *testing.T
 		WithArgs(wsA).
 		WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))

-	// CascadeDelete walks descendants unconditionally — 0-children case
-	// returns 0 rows here.
-	mock.ExpectQuery("WITH RECURSIVE descendants").
-		WithArgs(wsA).
-		WillReturnRows(sqlmock.NewRows([]string{"id"}))
-
 	// Mark only workspace A as removed
 	mock.ExpectExec("UPDATE workspaces SET status =").
 		WillReturnResult(sqlmock.NewResult(0, 1))
--- a/workspace-server/internal/pendinguploads/sweeper_test.go
+++ b/workspace-server/internal/pendinguploads/sweeper_test.go
@ -207,25 +207,20 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()

-	// Capture metric baseline so we can wait for the error counter to
-	// settle before returning — otherwise this test's leaked metric
-	// write races with the next test's metricDelta() baseline read and
-	// causes a non-deterministic +1 leak (manifests as
-	// TestStartSweeper_RecordsMetricsOnSuccess: "error counter delta=1,
-	// want 0"). cycleDone fires inside the fake's Sweep defer, BEFORE
-	// sweepOnce records the error metric — so cancel() right after
-	// waitForCycle is too early.
-	_, _, deltaError := metricDelta(t)
-
+	// 50ms ticker so the second cycle fires quickly enough for the test.
+	// We re-export SweepInterval as a const, but tests use the public
+	// StartSweeper that takes its own interval — wait, the public
+	// StartSweeper signature uses the package-level SweepInterval. Hmm,
+	// this means the test takes ~5 minutes. Let me reconsider.
+	//
+	// (We patch the test below to just look at the immediate-sweep call
+	// + an error path, since the immediate call is enough to prove the
+	// "error doesn't crash" contract — the loop continues afterward
+	// regardless of timing.)
 	go pendinguploads.StartSweeper(ctx, store, time.Hour)

 	// Wait for the first (errored) cycle.
 	store.waitForCycle(t, 1, 2*time.Second)
-	// Wait for the goroutine to record the error metric. After this
-	// returns, sweepOnce has fully completed and a subsequent cancel()
-	// stops the loop on the next select pass with no in-flight metric
-	// writes outstanding.
-	waitForMetricDelta(t, deltaError, 1, 2*time.Second)
 	// Cancel — the goroutine returns cleanly, proving the error path
 	// didn't crash the loop. Without this fix the goroutine would have
 	// either panicked (process abort visible at exit) or stuck (this
--- a/workspace-server/internal/provisioner/provisioner.go
+++ b/workspace-server/internal/provisioner/provisioner.go
@ -765,21 +765,6 @@ func ApplyTierConfig(hostCfg *container.HostConfig, cfg WorkspaceConfig, configM

 // CopyTemplateToContainer copies files from a host directory into /configs in the container.
 func (p *Provisioner) CopyTemplateToContainer(ctx context.Context, containerID, templatePath string) error {
-	// Resolve symlinks at the root before walking. filepath.Walk does
-	// NOT follow a symlink that IS the root — it Lstats the path, sees
-	// a symlink (non-directory), and emits exactly one entry without
-	// descending. With cross-repo composition (parent template's
-	// dev-lead → ../sibling-repo/dev-lead/, see internal#77), the
-	// caller routinely passes a symlink as templatePath. Without this
-	// resolution the workspace's /configs/ mount lands empty.
-	//
-	// Security: templatePath has already passed resolveInsideRoot's
-	// path-string check at the call site — the trust boundary is the
-	// operator-side /org-templates/ filesystem layout, not this
-	// resolution step.
-	if resolved, err := filepath.EvalSymlinks(templatePath); err == nil {
-		templatePath = resolved
-	}
 	var buf bytes.Buffer
 	tw := tar.NewWriter(&buf)

--- a/workspace-server/migrations/20260508160000_workspace_plugins_tracking.down.sql
+++ b/workspace-server/migrations/20260508160000_workspace_plugins_tracking.down.sql
@ -1,3 +0,0 @@
-DROP INDEX IF EXISTS workspace_plugins_tracked_not_none;
-DROP INDEX IF EXISTS workspace_plugins_workspace_name;
-DROP TABLE IF EXISTS workspace_plugins;
--- a/workspace-server/migrations/20260508160000_workspace_plugins_tracking.up.sql
+++ b/workspace-server/migrations/20260508160000_workspace_plugins_tracking.up.sql
@ -1,39 +0,0 @@
-- workspace_plugins: per-workspace record of installed plugins, with the
-- tracked-ref needed for the version-subscription model (core#113).
--
-- Today plugin install state is filesystem-only — `/configs/plugins/<name>/`
-- inside the workspace container. There's no DB record of "what's installed
-- where, from what source, pinned to what." That's fine until you want
-- drift detection (compare upstream tag's resolved SHA vs the installed
-- one) and that's the foundation this table provides.
--
-- This migration is purely additive: existing install paths keep working;
-- they'll write to this table on next install. Workspaces with plugins
-- already installed before this migration won't have rows until they're
-- re-installed (acceptable — the tracking is forward-looking).
--
-- tracked_ref values:
--   'none'         — no auto-update tracking (default)
--   'tag:vX.Y.Z'   — track a specific version tag
--   'tag:latest'   — track the latest tag (drift on every new tag)
--   'sha:<full>'   — pinned to a specific commit SHA (no drift ever)
--
-- A subsequent migration adds the plugin_update_queue table once drift
-- detection lands.
-
-CREATE TABLE IF NOT EXISTS workspace_plugins (
-  id              UUID        PRIMARY KEY DEFAULT gen_random_uuid(),
-  workspace_id    UUID        NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
-  plugin_name     TEXT        NOT NULL,
-  source_raw      TEXT        NOT NULL,
-  tracked_ref     TEXT        NOT NULL DEFAULT 'none',
-  installed_at    TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-  updated_at      TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-CREATE UNIQUE INDEX IF NOT EXISTS workspace_plugins_workspace_name
-  ON workspace_plugins(workspace_id, plugin_name);
-
-- Partial index for the drift detector: only scan rows opted into tracking.
-CREATE INDEX IF NOT EXISTS workspace_plugins_tracked_not_none
-  ON workspace_plugins(tracked_ref) WHERE tracked_ref != 'none';
--- a/workspace-server/migrations/20260508170000_workspaces_update_tier.down.sql
+++ b/workspace-server/migrations/20260508170000_workspaces_update_tier.down.sql
@ -1,2 +0,0 @@
-DROP INDEX IF EXISTS workspaces_update_tier_canary;
-ALTER TABLE workspaces DROP COLUMN IF EXISTS update_tier;
--- a/workspace-server/migrations/20260508170000_workspaces_update_tier.up.sql
+++ b/workspace-server/migrations/20260508170000_workspaces_update_tier.up.sql
@ -1,26 +0,0 @@
-- workspaces.update_tier — canary vs production filter for plugin updates
-- (core#115). Composes with the version-subscription DB foundation
-- (core#113, merged) and the upcoming drift+queue+apply endpoint
-- (core#123).
--
-- Tiers:
--   'production' (default) — fan-out target; only updated AFTER canary soak
--   'canary'               — early-adopter target; updates land here first
--
-- Default 'production' so existing customers (Reno-Stars + any future
-- live tenant) are default-safe. Synthetic dogfooding workspaces opt
-- INTO 'canary' explicitly.
--
-- The column is just metadata at this layer; the actual filter logic
-- ('apply this update only to canary tier first') lives in the future
-- POST /admin/plugin-updates/:id/apply endpoint (core#123).
-
-ALTER TABLE workspaces
-  ADD COLUMN IF NOT EXISTS update_tier TEXT NOT NULL DEFAULT 'production'
-    CHECK (update_tier IN ('canary', 'production'));
-
-- Partial index for the apply endpoint's canary-tier scan: only
-- index canary rows since the apply path queries them most often
-- and the production set is the much larger default.
-CREATE INDEX IF NOT EXISTS workspaces_update_tier_canary
-  ON workspaces(update_tier) WHERE update_tier = 'canary';