fix(ci): use AUTO_SYNC_TOKEN for auto-sync main->staging (Class D)

Same shape as molecule-controlplane#29: per-job GITHUB_TOKEN doesn't have the Gitea API permissions to open PRs / push branches the auto-sync flow needs. AUTO_SYNC_TOKEN is the devops-engineer persona PAT (per saved memory feedback_per_agent_gitea_identity_default). Companion prod ops (already done): - devops-engineer added as collaborator on molecule-core (write) - devops-engineer added to staging branch protection push_whitelist - AUTO_SYNC_TOKEN registered as Actions secret on molecule-core
Merge pull request 'chore(ci): retrigger staging CI on new runner image' (#25 ) from chore/retrigger-staging-on-fixed-runner-image into staging
2026-05-07 07:01:46 -07:00 · 2026-05-07 13:50:16 +00:00 · 2026-05-07 06:48:13 -07:00 · 2026-05-07 12:14:36 +00:00 · 2026-05-07 05:12:06 -07:00 · 2026-05-07 11:46:29 +00:00
134 changed files with 9980 additions and 1358 deletions
@@ -37,7 +37,7 @@ CANONICAL_FILE = Path(".github/workflows/secret-scan.yml")
 CONSUMERS: list[tuple[str, str]] = [
    (
        "molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh",
-        "https://raw.githubusercontent.com/Molecule-AI/molecule-ai-workspace-runtime/main/molecule_runtime/scripts/pre-commit-checks.sh",
+        "https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime/raw/branch/main/molecule_runtime/scripts/pre-commit-checks.sh",
    ),
 ]

@@ -103,7 +103,7 @@ jobs:
        with:
          fetch-depth: 0
          ref: staging
-          token: ${{ secrets.GITHUB_TOKEN }}
+          token: ${{ secrets.AUTO_SYNC_TOKEN }}

      - name: Configure git author
        run: |
@@ -174,7 +174,7 @@ jobs:
      - name: Open auto-sync PR + enable auto-merge
        if: steps.check.outputs.needs_sync == 'true'
        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
          BRANCH: ${{ steps.check.outputs.branch }}
          MAIN_SHORT: ${{ steps.check.outputs.main_short }}
          DID_FF: ${{ steps.prep.outputs.did_ff }}
@@ -235,7 +235,7 @@ jobs:
        run: npx vitest run --coverage
      - name: Upload coverage summary as artifact
        if: needs.changes.outputs.canvas == 'true' && always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: canvas-coverage-${{ github.run_id }}
          path: canvas/coverage/
@@ -55,17 +55,8 @@ jobs:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - name: Checkout sibling plugin repo
-        # Same reasoning as publish-workspace-server-image.yml — the Go
-        # module's replace directive needs the plugin source so
-        # CodeQL's "go build" phase can resolve.
-        if: matrix.language == 'go'
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
-          path: molecule-ai-plugin-github-app-auth
-          token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
-
+      # github-app-auth sibling-checkout removed 2026-05-07 (#157):
+      # plugin was dropped + the Dockerfile no longer needs it.
      # jq is pre-installed on ubuntu-latest — no setup step needed.

      - name: Initialize CodeQL
@@ -121,7 +112,7 @@ jobs:
        # 14-day retention — longer than default 3, short enough not
        # to bloat quota.
        if: always()
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: codeql-sarif-${{ matrix.language }}
          path: sarif-results/${{ matrix.language }}/
@@ -139,7 +139,7 @@ jobs:

      - name: Upload Playwright report on failure
        if: failure() && needs.detect-changes.outputs.canvas == 'true'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: playwright-report-staging
          path: canvas/playwright-report-staging/
@@ -147,7 +147,7 @@ jobs:

      - name: Upload screenshots on failure
        if: failure() && needs.detect-changes.outputs.canvas == 'true'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: playwright-screenshots
          path: canvas/test-results/
@@ -121,8 +121,16 @@ jobs:
          # Per-migration result is logged so a failed migration that
          # SHOULD have been replayable surfaces in the CI log instead
          # of silently failing.
+          # Apply both *.sql (legacy, lives next to its module) and
+          # *.up.sql (newer up/down convention) in a single
+          # lexicographically-sorted pass. Excluding *.down.sql so the
+          # newest-naming-convention pairs don't undo themselves mid-run.
+          # Pre-#149-followup this loop only globbed *.up.sql, which
+          # silently skipped 001_workspaces.sql + 009_activity_logs.sql
+          # — fine while no integration test depended on those tables,
+          # not fine once a cross-table atomicity test came in.
          set +e
-          for migration in migrations/*.up.sql; do
+          for migration in $(ls migrations/*.sql 2>/dev/null | grep -v '\.down\.sql$' | sort); do
            if psql -h localhost -U postgres -d molecule -v ON_ERROR_STOP=1 \
                  -f "$migration" >/dev/null 2>&1; then
              echo "✓ $(basename "$migration")"
@@ -132,16 +140,19 @@ jobs:
          done
          set -e

-          # Sanity: the delegations table MUST exist for the integration
-          # tests to be meaningful. Hard-fail if 049 didn't land — that
-          # would be a real regression we want loud.
-          if ! psql -h localhost -U postgres -d molecule -tA \
-              -c "SELECT 1 FROM information_schema.tables WHERE table_name = 'delegations'" \
-              | grep -q 1; then
-            echo "::error::delegations table missing after migration replay — handler integration tests would be meaningless"
-            exit 1
-          fi
-          echo "✓ delegations table present"
+          # Sanity: the delegations + workspaces + activity_logs tables
+          # MUST exist for the integration tests to be meaningful. Hard-
+          # fail if any didn't land — that would be a real regression we
+          # want loud.
+          for tbl in delegations workspaces activity_logs pending_uploads; do
+            if ! psql -h localhost -U postgres -d molecule -tA \
+                -c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
+                | grep -q 1; then
+              echo "::error::$tbl table missing after migration replay — handler integration tests would be meaningless"
+              exit 1
+            fi
+            echo "✓ $tbl table present"
+          done

      - if: needs.detect-changes.outputs.handlers == 'true'
        name: Run integration tests
@@ -95,16 +95,8 @@ jobs:
      - if: needs.detect-changes.outputs.run == 'true'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - name: Checkout sibling plugin repo
-        # Dockerfile.tenant copies molecule-ai-plugin-github-app-auth/
-        # at the build-context root (see workspace-server/Dockerfile.tenant
-        # line 19). PLUGIN_REPO_PAT pattern matches publish-workspace-server-image.yml.
-        if: needs.detect-changes.outputs.run == 'true'
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
-          path: molecule-ai-plugin-github-app-auth
-          token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
+      # github-app-auth sibling-checkout removed 2026-05-07 (#157):
+      # the plugin was dropped + Dockerfile.tenant no longer COPYs it.

      - name: Install Python deps for replays
        # peer-discovery-404 (and future replays) eval Python against the
@@ -282,42 +282,33 @@ jobs:
          echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces."
          exit 1

-      - name: Fan out repository_dispatch
+      - name: Fan out via push to .runtime-version
        env:
-          # Fine-grained PAT with `actions:write` on the 8 template repos.
-          # GITHUB_TOKEN can't fire dispatches across repos — needs an explicit
-          # token. Stored as a repo secret; rotate per the standard schedule.
-          DISPATCH_TOKEN: ${{ secrets.TEMPLATE_DISPATCH_TOKEN }}
-          # Single source of truth: the publish job's output, which handles
-          # tag/manual-input/auto-bump uniformly. The previous fallback
-          # (`steps.version.outputs.version` from inside the cascade job)
-          # was a dead reference — different job, no shared step scope.
+          # Gitea PAT with write:repository scope on the 8 cascade-active
+          # template repos. Used here for `git push` (NOT for an API
+          # dispatch — Gitea 1.22.6 has no repository_dispatch endpoint;
+          # empirically verified across 6 candidate paths in molecule-
+          # core#20 issuecomment-913). The push trips each template's
+          # existing `on: push: branches: [main]` trigger on
+          # publish-image.yml, which then reads the updated
+          # .runtime-version via its resolve-version job.
+          DISPATCH_TOKEN: ${{ secrets.DISPATCH_TOKEN }}
          RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
        run: |
          set +e   # don't abort on a single repo failure — collect them all
-          # Schedule-vs-dispatch behaviour split (hardened 2026-04-28
-          # after the sweep-cf-orphans soft-skip incident — same class
-          # of bug):
-          #
-          # The earlier "skipping cascade. templates will pick up the
-          # new version on their own next rebuild" message was wrong —
-          # templates only build on this dispatch trigger; without it
-          # they stay pinned to whatever runtime version they last saw.
-          # A silent skip here means "PyPI is current, templates are
-          # not" and the gap is invisible until someone notices a
-          # template still on the old version weeks later.
-          #
-          #   - push                → exit 1 (red CI surfaces the gap)
-          #   - workflow_dispatch   → exit 0 with a warning (operator
-          #                           ran this ad-hoc; let them rerun
-          #                           after fixing the secret)
+
+          # Soft-skip on workflow_dispatch when the token is missing
+          # (operator ad-hoc test); hard-fail on push so unattended
+          # publishes can't silently skip the cascade. Same shape as
+          # the original v1, intentional split per the schedule-vs-
+          # dispatch hardening 2026-04-28.
          if [ -z "$DISPATCH_TOKEN" ]; then
            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
-              echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade."
+              echo "::warning::DISPATCH_TOKEN secret not set — skipping cascade."
              echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually."
              exit 0
            fi
-            echo "::error::TEMPLATE_DISPATCH_TOKEN secret missing — cascade cannot fan out."
+            echo "::error::DISPATCH_TOKEN secret missing — cascade cannot fan out."
            echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade."
            echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch."
            exit 1
@@ -327,37 +318,119 @@ jobs:
            echo "::error::publish job did not expose a version output — cascade cannot fan out"
            exit 1
          fi
-          # All 9 active workspace template repos. The PR #2536 pruning
-          # ("deprecated, no shipping images") was empirically wrong:
-          # continuous-synth-e2e.yml defaults to langgraph as its primary
-          # canary (line 44), and every excluded template had successful
-          # publish-image runs as of 2026-05-03 — none were dormant.
-          # Symptom of the prune: today's a2a-sdk strict-mode fix
-          # (#2566 / commit e1628c4) cascaded to 4 templates but never
-          # reached langgraph, so the synth-E2E correctly canary'd a fix
-          # that had landed but not deployed. Re-added the 5 templates.
-          # Long-term: derive this list from manifest.json so cascade
-          # scope can't drift from E2E scope — tracked in RFC #388 as a
-          # Phase-1 invariant.
+
+          # All 9 workspace templates declared in manifest.json. The list
+          # MUST stay aligned with manifest.json's workspace_templates —
+          # cascade-list-drift-gate.yml enforces this in CI per the
+          # codex-stuck-on-stale-runtime invariant from PR #2556.
+          # Long-term goal: derive this list from manifest.json so it
+          # can't drift even on a manifest edit (RFC #388 Phase-1).
+          #
+          # Per-template publish-image.yml presence is checked at
+          # cascade-time below: codex doesn't ship one today, so the
+          # cascade soft-skips it with an informational message rather
+          # than dropping it from this list (which would re-introduce
+          # the drift the gate exists to catch).
+          GITEA_URL="${GITEA_URL:-https://git.moleculesai.app}"
          TEMPLATES="claude-code hermes openclaw codex langgraph crewai autogen deepagents gemini-cli"
          FAILED=""
+          SKIPPED=""
+
+          # Configure git identity once. The persona owning DISPATCH_TOKEN
+          # is the same identity that authored this commit on each
+          # template; using a generic "publish-runtime cascade" co-author
+          # trailer in the message keeps the audit trail honest about the
+          # workflow-driven origin.
+          git config --global user.name  "publish-runtime cascade"
+          git config --global user.email "publish-runtime@moleculesai.app"
+
+          WORKDIR="$(mktemp -d)"
          for tpl in $TEMPLATES; do
-            REPO="Molecule-AI/molecule-ai-workspace-template-$tpl"
-            STATUS=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" \
-              -X POST "https://api.github.com/repos/$REPO/dispatches" \
-              -H "Authorization: Bearer $DISPATCH_TOKEN" \
-              -H "Accept: application/vnd.github+json" \
-              -H "X-GitHub-Api-Version: 2022-11-28" \
-              -d "{\"event_type\":\"runtime-published\",\"client_payload\":{\"runtime_version\":\"$VERSION\"}}")
-            if [ "$STATUS" = "204" ]; then
-              echo "✓ dispatched $tpl ($VERSION)"
-            else
-              echo "::warning::✗ failed to dispatch $tpl: HTTP $STATUS — $(cat /tmp/dispatch.out)"
+            REPO="molecule-ai/molecule-ai-workspace-template-$tpl"
+            CLONE="$WORKDIR/$tpl"
+
+            # Pre-check: skip templates without a publish-image.yml.
+            # The cascade's job is to trip the template's on-push
+            # rebuild — if there's no rebuild workflow, pushing a
+            # .runtime-version commit is just noise on the target
+            # repo. Use the Gitea contents API (no clone required for
+            # the probe). 200 = present; 404 = absent.
+            HTTP=$(curl -sS -o /dev/null -w "%{http_code}" \
+              -H "Authorization: token $DISPATCH_TOKEN" \
+              "$GITEA_URL/api/v1/repos/$REPO/contents/.github/workflows/publish-image.yml")
+            if [ "$HTTP" = "404" ]; then
+              echo "↷ $tpl has no publish-image.yml — soft-skip (informational; manifest still tracks it)"
+              SKIPPED="$SKIPPED $tpl"
+              continue
+            fi
+            if [ "$HTTP" != "200" ]; then
+              echo "::warning::$tpl publish-image.yml probe returned HTTP $HTTP — proceeding anyway, push will surface the real failure if any"
+            fi
+
+            # Use a per-template attempt loop so a transient race (e.g.
+            # human pushing to the same template at the same instant)
+            # doesn't lose the cascade. Bounded retries (3) — beyond
+            # that we surface the failure and let the operator retry.
+            attempt=0
+            success=false
+            while [ $attempt -lt 3 ]; do
+              attempt=$((attempt + 1))
+              rm -rf "$CLONE"
+              if ! git clone --depth=1 \
+                  "https://x-access-token:${DISPATCH_TOKEN}@${GITEA_URL#https://}/$REPO.git" \
+                  "$CLONE" >/tmp/clone.log 2>&1; then
+                echo "::warning::clone $tpl attempt $attempt failed: $(tail -n3 /tmp/clone.log)"
+                sleep 2
+                continue
+              fi
+
+              cd "$CLONE"
+              echo "$VERSION" > .runtime-version
+
+              # Idempotency guard: if the file already matches, this
+              # publish is a re-run for a version already cascaded.
+              # Don't push a no-op commit (would spuriously re-trip the
+              # template's on-push and rebuild for nothing).
+              if git diff --quiet -- .runtime-version; then
+                echo "✓ $tpl already at $VERSION — no commit needed (idempotent)"
+                success=true
+                cd - >/dev/null
+                break
+              fi
+
+              git add .runtime-version
+              git commit -m "chore: pin runtime to $VERSION (publish-runtime cascade)" \
+                -m "Co-Authored-By: publish-runtime cascade <publish-runtime@moleculesai.app>" \
+                >/dev/null
+
+              if git push origin HEAD:main >/tmp/push.log 2>&1; then
+                echo "✓ $tpl pushed $VERSION on attempt $attempt"
+                success=true
+                cd - >/dev/null
+                break
+              fi
+
+              # Likely a non-fast-forward — pull-rebase and retry.
+              # Don't force-push: that would silently overwrite a racing
+              # human/cascade commit.
+              echo "::warning::push $tpl attempt $attempt failed, pull-rebasing: $(tail -n3 /tmp/push.log)"
+              git pull --rebase origin main >/tmp/rebase.log 2>&1 || true
+              cd - >/dev/null
+            done
+
+            if [ "$success" != "true" ]; then
              FAILED="$FAILED $tpl"
            fi
          done
+          rm -rf "$WORKDIR"
+
          if [ -n "$FAILED" ]; then
-            echo "::warning::Cascade incomplete. Failed templates:$FAILED"
-            # Don't fail the whole job — PyPI publish already succeeded;
-            # operators can retry the failed templates manually.
+            echo "::error::Cascade incomplete after 3 retries each. Failed templates:$FAILED"
+            echo "::error::PyPI publish succeeded; failed templates lag the new version. Re-run this workflow_dispatch with the same version to retry only the laggers (idempotent — already-cascaded templates skip)."
+            exit 1
+          fi
+          if [ -n "$SKIPPED" ]; then
+            echo "Cascade complete: pinned $VERSION on cascade-active templates. Soft-skipped (no publish-image.yml):$SKIPPED"
+          else
+            echo "Cascade complete: $VERSION pinned across all manifest workspace_templates."
          fi
@@ -60,8 +60,8 @@ permissions:
  packages: write

 env:
-  IMAGE_NAME: ghcr.io/molecule-ai/platform
-  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
+  IMAGE_NAME: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/platform
+  TENANT_IMAGE_NAME: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/platform-tenant

 jobs:
  build-and-push:
@@ -70,31 +70,28 @@ jobs:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - name: Checkout sibling plugin repo
-        # workspace-server/Dockerfile expects
-        # ./molecule-ai-plugin-github-app-auth at build-context root because
-        # the Go module has a `replace` directive pointing at /plugin inside
-        # the image. Pre-repo-split the plugin lived in the monorepo; the
-        # 2026-04-18 restructure moved it out but didn't add this clone step
-        # — which is why publish was failing after that restructure.
-        #
-        # Uses a fine-grained PAT (PLUGIN_REPO_PAT) because the plugin repo
-        # is private and the default GITHUB_TOKEN is scoped to THIS repo.
-        # The PAT needs Contents:Read on Molecule-AI/molecule-ai-plugin-
-        # github-app-auth. Falls back to the default token for the (rare)
-        # case where an operator made the plugin repo public.
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
-          path: molecule-ai-plugin-github-app-auth
-          token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
+      # github-app-auth sibling-checkout removed 2026-05-07 (#157):
+      # plugin was dropped + workspace-server/Dockerfile no longer
+      # COPYs it.

-      - name: Log in to GHCR
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
+      - name: Configure AWS credentials for ECR
+        # GHCR was the pre-suspension target; the molecule-ai org on
+        # GitHub got swept 2026-05-06 and ghcr.io/molecule-ai/* is no
+        # longer reachable. Post-suspension target is the operator's
+        # ECR org (153263036946.dkr.ecr.us-east-2.amazonaws.com/
+        # molecule-ai/*), which already hosts platform-tenant +
+        # workspace-template-* + runner-base images. AWS creds come
+        # from the AWS_ACCESS_KEY_ID/SECRET secrets bound to the
+        # molecule-cp IAM user. Closes #161.
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: us-east-2
+
+      - name: Log in to ECR
+        id: ecr-login
+        uses: aws-actions/amazon-ecr-login@v2

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
@@ -108,6 +108,14 @@ jobs:
          python3 > stale_slugs.txt <<'PY'
          import json, os
          from datetime import datetime, timezone, timedelta
+          # SSOT for this list lives in the controlplane Go code:
+          # molecule-controlplane/internal/slugs/ephemeral.go
+          # (var EphemeralPrefixes). The redeploy-fleet auto-rollout
+          # also reads from there to SKIP these slugs — without that
+          # filter, fleet redeploy SSM-failed in-flight E2E tenants
+          # whose containers were still booting, breaking the test
+          # that just spun them up (molecule-controlplane#493).
+          # Update both files together.
          EPHEMERAL_PREFIXES = ("e2e-", "rt-e2e-")
          with open("orgs.json") as f:
              data = json.load(f)
@@ -185,7 +193,47 @@ jobs:
          # sweeper is best-effort. Next hourly tick re-attempts. We
          # only fail loud at the safety-cap gate above.

+      - name: Sweep orphan tunnels
+        # Stale-org cleanup deletes the org (which cascades to tunnel
+        # delete inside the CP). But when that cascade fails partway —
+        # CP transient 5xx after the org row is deleted but before the
+        # CF tunnel delete completes — the tunnel persists with no
+        # matching org row. The reconciler in internal/sweep flags this
+        # as `cf_tunnel kind=orphan`, but nothing automatically reaps it.
+        #
+        # `/cp/admin/orphan-tunnels/cleanup` is the operator-triggered
+        # reaper. Calling it here at the end of every sweep tick
+        # converges the staging CF account to clean even when CP
+        # cascades half-fail.
+        #
+        # PR #492 made the underlying DeleteTunnel actually check
+        # status — pre-fix it silent-succeeded on CF code 1022
+        # ("active connections"), so this step would have been a no-op
+        # against stuck connectors. Post-fix the cleanup invokes
+        # CleanupTunnelConnections + retry, which actually clears the
+        # 1022 case. (#2987)
+        #
+        # Best-effort. Failure here doesn't fail the workflow — next
+        # tick re-attempts. Errors flow to step output for ops review.
+        if: env.DRY_RUN != 'true'
+        run: |
+          set +e
+          curl -sS -o /tmp/cleanup_resp -w "%{http_code}" \
+            --max-time 60 \
+            -X POST "$MOLECULE_CP_URL/cp/admin/orphan-tunnels/cleanup" \
+            -H "Authorization: Bearer $ADMIN_TOKEN" >/tmp/cleanup_code
+          set -e
+          http_code=$(cat /tmp/cleanup_code 2>/dev/null || echo "000")
+          body=$(cat /tmp/cleanup_resp 2>/dev/null | head -c 500)
+          if [ "$http_code" = "200" ]; then
+            count=$(echo "$body" | python3 -c "import sys,json; d=json.loads(sys.stdin.read() or '{}'); print(d.get('deleted_count', 0))" 2>/dev/null || echo "0")
+            failed_n=$(echo "$body" | python3 -c "import sys,json; d=json.loads(sys.stdin.read() or '{}'); print(len(d.get('failed') or {}))" 2>/dev/null || echo "0")
+            echo "Orphan-tunnel sweep: deleted=$count failed=$failed_n"
+          else
+            echo "::warning::orphan-tunnels cleanup returned HTTP $http_code — body: $body"
+          fi
+
      - name: Dry-run summary
        if: env.DRY_RUN == 'true'
        run: |
-          echo "DRY RUN — would have deleted ${{ steps.identify.outputs.count }} org(s). Re-run with dry_run=false to actually delete."
+          echo "DRY RUN — would have deleted ${{ steps.identify.outputs.count }} org(s) AND triggered orphan-tunnels cleanup. Re-run with dry_run=false to actually delete."
@@ -22,7 +22,7 @@ development workflow, conventions, and how to get your changes merged.

 ```bash
 # Clone the repo
-git clone https://github.com/Molecule-AI/molecule-core.git
+git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
 cd molecule-core

 # Install git hooks
@@ -57,7 +57,7 @@ See `CLAUDE.md` for a full list of environment variables and their purposes.

 This repo is scoped to **code** (canvas, workspace, workspace-server, related
 infra). Public content (blog posts, marketing copy, OG images, SEO briefs,
-DevRel demos) lives in [`Molecule-AI/docs`](https://github.com/Molecule-AI/docs).
+DevRel demos) lives in [`Molecule-AI/docs`](https://git.moleculesai.app/molecule-ai/docs).
 The `Block forbidden paths` CI gate fails any PR that writes to `marketing/`
 or other removed paths — open against `Molecule-AI/docs` instead.

@@ -110,7 +110,7 @@ causing a render loop when any node position changed.

 1. **Repo-wide:** "Automatically delete head branches" is on. Once a PR merges, the branch is deleted server-side. Any subsequent `git push` to that branch fails with `remote rejected — no such branch`.

-2. **CI:** the `pr-guards` workflow (calling [molecule-ci `disable-auto-merge-on-push`](https://github.com/Molecule-AI/molecule-ci/blob/main/.github/workflows/disable-auto-merge-on-push.yml)) fires on every push to an open PR. If auto-merge was already enabled, it's disabled and a comment is posted. You must explicitly re-enable after verifying the new commit.
+2. **CI:** the `pr-guards` workflow (calling [molecule-ci `disable-auto-merge-on-push`](https://git.moleculesai.app/molecule-ai/molecule-ci/src/branch/main/.github/workflows/disable-auto-merge-on-push.yml)) fires on every push to an open PR. If auto-merge was already enabled, it's disabled and a comment is posted. You must explicitly re-enable after verifying the new commit.

 **Workflow rules that follow from the guards:**
 - Push **all** commits before running `gh pr merge --auto`.
@@ -180,9 +180,9 @@ and run CI manually.
 Code in this repo lands in molecule-core. Some related runtime artifacts
 live in their own repos:

- [`Molecule-AI/molecule-ai-workspace-runtime`](https://github.com/Molecule-AI/molecule-ai-workspace-runtime) — Python adapter SDK (`molecule_runtime`) that runs inside containerized Molecule workspaces. Bridges Claude Code SDK / hermes / langgraph / etc. → A2A queue.
- [`Molecule-AI/molecule-sdk-python`](https://github.com/Molecule-AI/molecule-sdk-python) — `A2AServer` + `RemoteAgentClient` for external agents that register over the public `/registry/register` flow.
- [`Molecule-AI/molecule-mcp-claude-channel`](https://github.com/Molecule-AI/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install with `claude --channels plugin:molecule@Molecule-AI/molecule-mcp-claude-channel`.
+- [`Molecule-AI/molecule-ai-workspace-runtime`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime) — Python adapter SDK (`molecule_runtime`) that runs inside containerized Molecule workspaces. Bridges Claude Code SDK / hermes / langgraph / etc. → A2A queue.
+- [`Molecule-AI/molecule-sdk-python`](https://git.moleculesai.app/molecule-ai/molecule-sdk-python) — `A2AServer` + `RemoteAgentClient` for external agents that register over the public `/registry/register` flow.
+- [`Molecule-AI/molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install with `claude --channels plugin:molecule@Molecule-AI/molecule-mcp-claude-channel`.

 When extending the **A2A surface** in molecule-core (`workspace-server/internal/handlers/a2a_proxy.go` etc.), consider whether the change has a downstream impact on the runtime SDK or the channel plugin — they're versioned independently but share the wire shape.

@@ -1,7 +1,7 @@
 <div align="center">

 <p>
-  <img src="./docs/assets/branding/molecule-icon.png" alt="Molecule AI Icon Logo" width="160" />
+  <img src="./docs/assets/branding/molecule-icon.svg" alt="Molecule AI" width="160" />
 </p>

 <p>
@@ -39,8 +39,8 @@
  <a href="./docs/agent-runtime/workspace-runtime.md"><strong>Workspace Runtime</strong></a>
 </p>

-[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/new/template?template=https://github.com/Molecule-AI/molecule-monorepo)
-[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/Molecule-AI/molecule-monorepo)
+[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/new/template?template=https://git.moleculesai.app/molecule-ai/molecule-core)
+[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://git.moleculesai.app/molecule-ai/molecule-core)

 </div>

@@ -53,8 +53,8 @@ Molecule AI is the most powerful way to govern an AI agent organization in produ
 It combines the parts that are usually scattered across demos, internal glue code, and framework-specific tooling into one product:

 - one org-native control plane for teams, roles, hierarchy, and lifecycle
- one runtime layer that lets LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, and OpenClaw run side by side
- one memory model that keeps recall, sharing, and skill evolution aligned with organizational boundaries
+- one runtime layer that lets **eight** agent runtimes — LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, **Hermes**, **Gemini CLI**, and OpenClaw — run side by side behind one workspace contract
+- one memory model that keeps recall, sharing, and skill evolution aligned with organizational boundaries (Memory v2 backed by pgvector for semantic recall)
 - one operational surface for observing, pausing, restarting, inspecting, and improving live workspaces

 Most teams can build a workflow, a strong single agent, a coding agent, or a custom multi-agent graph.
@@ -75,7 +75,7 @@ You do not wire collaboration paths by hand. Hierarchy defines the default commu

 ### 3. Runtime choice stops being a dead-end decision

-LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, and OpenClaw can all plug into the same workspace abstraction. Teams can standardize governance without forcing every group onto one runtime.
+LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, Hermes, Gemini CLI, and OpenClaw can all plug into the same workspace abstraction. Teams can standardize governance without forcing every group onto one runtime.

 ### 4. Memory is treated like infrastructure

@@ -117,6 +117,8 @@ Molecule AI is not trying to replace the frameworks below. It is the system that
 | **Claude Code** | Shipping on `main` | Real coding workflows, CLI-native continuity | Secure workspace abstraction, A2A delegation, org boundaries, shared control plane |
 | **CrewAI** | Shipping on `main` | Role-based crews | Persistent workspace identity, policy consistency, shared canvas and registry |
 | **AutoGen** | Shipping on `main` | Assistant/tool orchestration | Standardized deployment, hierarchy-aware collaboration, shared ops plane |
+| **Hermes 4** | Shipping on `main` | Hybrid reasoning, native tools, json_schema (NousResearch/hermes-agent) | Option B upstream hook, A2A bridge to OpenAI-compat API, multi-provider provider derivation |
+| **Gemini CLI** | Shipping on `main` | Google Gemini CLI continuity | Workspace lifecycle, A2A, hierarchy-aware collaboration, shared ops plane |
 | **OpenClaw** | Shipping on `main` | CLI-native runtime with its own session model | Workspace lifecycle, templates, activity logs, topology-aware collaboration |
 | **NemoClaw** | WIP on `feat/nemoclaw-t4-docker` | NVIDIA-oriented runtime path | Planned to join the same abstraction once merged; not yet part of `main` |

@@ -182,9 +184,10 @@ The result is not just “an agent that learns.” It is **an organization that

 ## What Ships In `main`

-### Canvas
+### Canvas (v4)

 - Next.js 15 + React Flow + Zustand
+- **warm-paper theme system** — light / dark / follow-system, SSR cookie + nonce'd boot script + ThemeProvider; terminal + code surfaces stay dark unconditionally
 - drag-to-nest team building
 - empty-state deployment + onboarding wizard
 - template palette
@@ -193,8 +196,9 @@ The result is not just “an agent that learns.” It is **an organization that

 ### Platform

- Go/Gin control plane
- workspace CRUD and provisioning
+- Go 1.25 / Gin control plane (80+ HTTP endpoints + Gorilla WebSocket fanout)
+- workspace CRUD and provisioning (pluggable Provisioner — Docker locally, EC2 + SSM in production)
+- **A2A response path is a typed discriminated union (RFC #2967)** — frozen dataclasses + total parser; 100% unit + adversarial fuzz coverage
 - registry and heartbeats
 - browser-safe A2A proxy
 - team expansion/collapse
@@ -204,10 +208,10 @@ The result is not just “an agent that learns.” It is **an organization that

 ### Runtime

- unified `workspace/` image
- adapter-driven execution
+- unified `workspace/` image; thin AMI in production (us-east-2)
+- adapter-driven execution across **8 runtimes** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw)
 - Agent Card registration
- awareness-backed memory integration
+- awareness-backed memory integration; **Memory v2 backed by pgvector** for semantic recall
 - plugin-mounted shared rules/skills
 - hot-reloadable local skills
 - coordinator-only delegation path
@@ -221,6 +225,21 @@ The result is not just “an agent that learns.” It is **an organization that
 - runtime tiers
 - direct workspace inspection through terminal and files

+### SaaS (via [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane))
+
+- multi-tenant on AWS EC2 + Neon (per-tenant Postgres branch) + Cloudflare Tunnels (per-tenant, no public ports)
+- WorkOS AuthKit + Stripe Checkout + Customer Portal
+- AWS KMS envelope encryption (DB / Redis connection strings); AWS Secrets Manager for tenant bootstrap
+- `tenant_resources` audit table + 30-min boot-event-aware reconciler — every CF / AWS lifecycle event recorded, claim vs live state diffed
+
+### Bring your own Claude Code session (via [`molecule-mcp-claude-channel`](https://github.com/Molecule-AI/molecule-mcp-claude-channel))
+
+- Claude Code plugin that bridges Molecule A2A traffic into a local Claude Code session via MCP
+- subscribe to one or more workspaces; peer messages surface as conversation turns; replies route back through Molecule's A2A
+- no tunnel, no public endpoint — the plugin self-registers each watched workspace as `delivery_mode=poll` and long-polls `/activity?since_id=…`
+- multi-tenant friendly: one plugin install can watch workspaces across multiple Molecule tenants (`MOLECULE_PLATFORM_URLS` per-workspace)
+- install via the standard marketplace flow: `/plugin marketplace add Molecule-AI/molecule-mcp-claude-channel` → `/plugin install molecule-channel@molecule-mcp-claude-channel`
+
 ## Built For Teams That Need More Than A Demo

 Molecule AI is especially strong when you need to run:
@@ -233,24 +252,30 @@ Molecule AI is especially strong when you need to run:
 ## Architecture

 ```text
-Canvas (Next.js :3000)  <--HTTP / WS-->  Platform (Go :8080)  <---> Postgres + Redis
-         |                                          |
-         |                                          +--> Docker provisioner / bundles / templates / secrets
+Canvas (Next.js 15, warm-paper :3000)  <--HTTP / WS-->  Platform (Go 1.25 :8080)  <---> Postgres + Redis
+         |                                                           |
+         |                                                           +--> Provisioner: Docker (local) / EC2 + SSM (prod)
+         |                                                           +--> bundles · templates · secrets · KMS
         |
-         +-------------------- shows --------------------> workspaces, teams, tasks, traces, events
+         +------------------------- shows ------------------------> workspaces, teams, tasks, traces, events

-Workspace Runtime (Python image with adapters)
-  - LangGraph / DeepAgents / Claude Code / CrewAI / AutoGen / OpenClaw
-  - Agent Card + A2A server
-  - heartbeat + activity + awareness-backed memory
+Workspace Runtime (Python ≥3.11, image with adapters)
+  - 8 adapters: LangGraph / DeepAgents / Claude Code / CrewAI / AutoGen / Hermes / Gemini CLI / OpenClaw
+  - Agent Card + A2A server (typed-SSOT response path, RFC #2967)
+  - heartbeat + activity + awareness-backed memory (Memory v2 — pgvector semantic recall)
  - skills + plugins + hot reload
+
+SaaS Control Plane (molecule-controlplane, private)
+  - per-tenant EC2 + Neon (Postgres branch) + Cloudflare Tunnel
+  - WorkOS · Stripe · KMS · AWS Secrets Manager
+  - tenant_resources audit + 30-min reconciler
 ```

 ## Quick Start

 ```bash
-git clone https://github.com/Molecule-AI/molecule-monorepo.git
-cd molecule-monorepo
+git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
+cd molecule-core

 cp .env.example .env
 # Defaults boot the stack locally out of the box. See .env.example for
@@ -303,7 +328,11 @@ Then open `http://localhost:3000`:

 ## Current Scope

-The current `main` branch already includes the core platform, canvas, memory model, six production adapters, skill lifecycle, and operational surfaces. Adjacent runtime work such as **NemoClaw** remains branch-level until merged, and this README keeps that distinction explicit on purpose.
+The current `main` branch ships the core platform, Canvas v4 (warm-paper themed), Memory v2 (pgvector semantic recall), the typed-SSOT A2A response path (RFC #2967), **eight production adapters** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw), skill lifecycle, and operational surfaces.
+
+The companion private repo [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane) provides the SaaS surface — multi-tenant orchestration on EC2 + Neon + Cloudflare Tunnels, KMS envelope encryption, WorkOS auth, Stripe billing, and a `tenant_resources` audit table with a 30-min reconciler.
+
+Adjacent runtime work such as **NemoClaw** remains branch-level until merged, and this README keeps that distinction explicit on purpose.

 ## License

@@ -1,7 +1,7 @@
 <div align="center">

 <p>
-  <img src="./docs/assets/branding/molecule-icon.png" alt="Molecule AI 图案 Logo" width="160" />
+  <img src="./docs/assets/branding/molecule-icon.svg" alt="Molecule AI" width="160" />
 </p>

 <p>
@@ -38,8 +38,8 @@
  <a href="./docs/agent-runtime/workspace-runtime.md"><strong>Workspace Runtime</strong></a>
 </p>

-[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/new/template?template=https://github.com/Molecule-AI/molecule-core)
-[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/Molecule-AI/molecule-core)
+[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/new/template?template=https://git.moleculesai.app/molecule-ai/molecule-core)
+[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://git.moleculesai.app/molecule-ai/molecule-core)

 </div>

@@ -52,8 +52,8 @@ Molecule AI 是目前最强的 AI Agent 组织治理方案之一，用来把 age
 它把过去分散在 demo、内部胶水代码和各类 framework 私有工具里的关键能力，收敛成一个产品：

 - 一套组织原生 control plane，管理团队、角色、层级和生命周期
- 一套 runtime abstraction，让 LangGraph、DeepAgents、Claude Code、CrewAI、AutoGen、OpenClaw 并存运行
- 一套与组织边界对齐的 memory 模型，把 recall、sharing 和 skill evolution 放进同一体系
+- 一套 runtime abstraction，让 **8 个** agent runtime —— LangGraph、DeepAgents、Claude Code、CrewAI、AutoGen、**Hermes**、**Gemini CLI**、OpenClaw —— 共用一套 workspace 契约
+- 一套与组织边界对齐的 memory 模型，把 recall、sharing 和 skill evolution 放进同一体系（Memory v2 由 pgvector 支撑语义召回）
 - 一套面向线上 workspace 的运维面，统一完成观测、暂停、重启、检查和持续改进

 今天很多团队能做好 workflow、单 agent、coding agent，或者自定义 multi-agent graph 中的一种。
@@ -74,7 +74,7 @@ Molecule AI 填的就是这个空白。

 ### 3. Runtime 选择不再是死路

-LangGraph、DeepAgents、Claude Code、CrewAI、AutoGen、OpenClaw 都可以挂到同一个 workspace abstraction 下。团队可以统一治理方式，而不必统一到底层 runtime。
+LangGraph、DeepAgents、Claude Code、CrewAI、AutoGen、Hermes、Gemini CLI、OpenClaw 都可以挂到同一个 workspace abstraction 下。团队可以统一治理方式，而不必统一到底层 runtime。

 ### 4. Memory 被当成基础设施来做

@@ -116,6 +116,8 @@ Molecule AI 并不是要替代下面这些 framework，而是把它们纳入更
 | **Claude Code** | `main` 已支持 | 真实编码工作流、CLI-native continuity | 安全 workspace 抽象、A2A delegation、组织边界、共享 control plane |
 | **CrewAI** | `main` 已支持 | 角色型 crew 模式清晰 | 持久 workspace 身份、统一策略、共享 Canvas 和 registry |
 | **AutoGen** | `main` 已支持 | assistant/tool orchestration | 统一部署、层级协作、共享运维平面 |
+| **Hermes 4** | `main` 已支持 | 混合推理、原生工具调用、json_schema 输出（NousResearch/hermes-agent） | Option B 上游 hook、A2A 桥接 OpenAI 兼容 API、多 provider 自动派生 |
+| **Gemini CLI** | `main` 已支持 | Google Gemini CLI 持续会话 | workspace 生命周期、A2A、层级感知协作、共享运维平面 |
 | **OpenClaw** | `main` 已支持 | CLI-native runtime，自有 session 模型 | workspace 生命周期、templates、activity logs、拓扑感知协作 |
 | **NemoClaw** | `feat/nemoclaw-t4-docker` 分支 WIP | NVIDIA 方向 runtime 路线 | 计划并入同一抽象层，但当前还不是 `main` 已合并能力 |

@@ -181,9 +183,10 @@ Molecule AI 并不是要替代下面这些 framework，而是把它们纳入更

 ## `main` 分支已经具备什么

-### Canvas
+### Canvas（v4）

 - Next.js 15 + React Flow + Zustand
+- **warm-paper 主题系统** —— light / dark / 跟随系统；SSR cookie + nonce'd boot 脚本 + ThemeProvider；终端与代码面板始终保持深色
 - drag-to-nest 团队构建
 - empty state + onboarding wizard
 - template palette
@@ -192,8 +195,9 @@ Molecule AI 并不是要替代下面这些 framework，而是把它们纳入更

 ### Platform

- Go/Gin control plane
- workspace CRUD 和 provisioning
+- Go 1.25 / Gin control plane（80+ HTTP 端点 + Gorilla WebSocket fanout）
+- workspace CRUD 和 provisioning（可插拔 Provisioner —— 本地 Docker、生产 EC2 + SSM）
+- **A2A 响应路径已收敛为类型化的判别联合（RFC #2967）** —— 冻结 dataclass + 全量 parser；100% 单元测试 + 对抗性 fuzz 覆盖
 - registry 与 heartbeat
 - 浏览器安全的 A2A proxy
 - team expansion/collapse
@@ -203,10 +207,10 @@ Molecule AI 并不是要替代下面这些 framework，而是把它们纳入更

 ### Runtime

- 统一 `workspace/` 镜像
- adapter 驱动执行
+- 统一 `workspace/` 镜像；生产环境采用 thin AMI（us-east-2）
+- adapter 驱动执行，覆盖 **8 个 runtime**（Claude Code、Hermes、Gemini CLI、LangGraph、DeepAgents、CrewAI、AutoGen、OpenClaw）
 - Agent Card 注册
- awareness-backed memory
+- awareness-backed memory；**Memory v2 由 pgvector 支撑**语义召回
 - plugin 挂载共享 rules/skills
 - 本地 skills 热加载
 - coordinator-only delegation 路径
@@ -220,6 +224,21 @@ Molecule AI 并不是要替代下面这些 framework，而是把它们纳入更
 - runtime tiers
 - 终端与文件层面的 workspace 直接排障

+### SaaS（由 [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane) 提供）
+
+- 多租户运行在 AWS EC2 + Neon（每租户一个 Postgres branch）+ Cloudflare Tunnels（每租户一条隧道，对外不开任何端口）
+- WorkOS AuthKit + Stripe Checkout + Customer Portal
+- AWS KMS 信封加密（DB / Redis 连接串）；AWS Secrets Manager 负责租户 bootstrap
+- `tenant_resources` 审计表 + 30 分钟 boot-event-aware reconciler —— 每个 CF / AWS lifecycle 事件都有记录，每 30 分钟比对 claim 与实际状态
+
+### 在 Claude Code 里直接接入（由 [`molecule-mcp-claude-channel`](https://github.com/Molecule-AI/molecule-mcp-claude-channel) 提供）
+
+- 把 Molecule A2A 流量桥接到本地 Claude Code 会话的 MCP 插件
+- 订阅一个或多个 workspace；peer 的消息会以 user-turn 出现，回复会经 Molecule A2A 路由出去
+- 无需公网隧道、无需公开端点 —— 插件启动时自动把每个 watched workspace 注册成 `delivery_mode=poll`，长轮询 `/activity?since_id=…`
+- 多租户友好：单次安装即可同时 watch 跨多个 Molecule 租户的 workspace（`MOLECULE_PLATFORM_URLS` 按 workspace 配置）
+- 通过标准 marketplace 流程安装：`/plugin marketplace add Molecule-AI/molecule-mcp-claude-channel` → `/plugin install molecule-channel@molecule-mcp-claude-channel`
+
 ## 适合什么团队

 Molecule AI 特别适合下面这些场景：
@@ -232,23 +251,29 @@ Molecule AI 特别适合下面这些场景：
 ## 架构总览

 ```text
-Canvas (Next.js :3000)  <--HTTP / WS-->  Platform (Go :8080)  <---> Postgres + Redis
-         |                                          |
-         |                                          +--> Docker provisioner / bundles / templates / secrets
+Canvas (Next.js 15, warm-paper :3000)  <--HTTP / WS-->  Platform (Go 1.25 :8080)  <---> Postgres + Redis
+         |                                                           |
+         |                                                           +--> Provisioner: Docker (本地) / EC2 + SSM (生产)
+         |                                                           +--> bundles · templates · secrets · KMS
         |
-         +-------------------- 展示 --------------------> workspaces, teams, tasks, traces, events
+         +------------------------- 展示 ------------------------> workspaces, teams, tasks, traces, events

-Workspace Runtime (Python image with adapters)
-  - LangGraph / DeepAgents / Claude Code / CrewAI / AutoGen / OpenClaw
-  - Agent Card + A2A server
-  - heartbeat + activity + awareness-backed memory
+Workspace Runtime (Python ≥3.11，含 adapter 集合的镜像)
+  - 8 个 adapter: LangGraph / DeepAgents / Claude Code / CrewAI / AutoGen / Hermes / Gemini CLI / OpenClaw
+  - Agent Card + A2A server（typed-SSOT 响应路径，RFC #2967）
+  - heartbeat + activity + awareness-backed memory（Memory v2 —— pgvector 语义召回）
  - skills + plugins + hot reload
+
+SaaS Control Plane (molecule-controlplane，私有)
+  - 每租户 EC2 + Neon (Postgres branch) + Cloudflare Tunnel
+  - WorkOS · Stripe · KMS · AWS Secrets Manager
+  - tenant_resources 审计 + 30 分钟 reconciler
 ```

 ## 快速开始

 ```bash
-git clone https://github.com/Molecule-AI/molecule-core.git
+git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
 cd molecule-core

 cp .env.example .env
@@ -296,7 +321,11 @@ npm run dev

 ## 当前范围说明

-当前 `main` 已经包含核心平台、Canvas、memory model、6 个正式 adapter、skill lifecycle 和主要运维面。像 **NemoClaw** 这样的相邻 runtime 路线仍然属于分支级工作，只有合并后才会进入正式支持列表，这里会明确区分。
+当前 `main` 已经包含核心平台、Canvas v4（warm-paper 主题）、Memory v2（pgvector 语义召回）、typed-SSOT A2A 响应路径（RFC #2967）、**8 个正式 adapter**（Claude Code、Hermes、Gemini CLI、LangGraph、DeepAgents、CrewAI、AutoGen、OpenClaw）、skill lifecycle，以及主要运维面。
+
+配套的私有仓库 [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane) 提供 SaaS 层 —— 多租户编排（EC2 + Neon + Cloudflare Tunnels）、KMS 信封加密、WorkOS 鉴权、Stripe 计费，以及 `tenant_resources` 审计表加 30 分钟 reconciler。
+
+像 **NemoClaw** 这样的相邻 runtime 路线仍然属于分支级工作，只有合并后才会进入正式支持列表，这里会明确区分。

 ## License

@@ -325,7 +325,6 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
            {dropdownOptions.map((opt) => (
              <option key={opt.value} value={opt.value}>
                {opt.label}
-                {opt.kind ? `  (${opt.kind})` : ''}
              </option>
            ))}
          </select>
@@ -287,7 +287,7 @@ export function SidePanel() {
        {panelTab === "config" && <ConfigTab key={selectedNodeId} workspaceId={selectedNodeId} />}
        {panelTab === "schedule" && <ScheduleTab key={selectedNodeId} workspaceId={selectedNodeId} />}
        {panelTab === "channels" && <ChannelsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
-        {panelTab === "files" && <FilesTab key={selectedNodeId} workspaceId={selectedNodeId} />}
+        {panelTab === "files" && <FilesTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
        {panelTab === "memory" && <MemoryInspectorPanel key={selectedNodeId} workspaceId={selectedNodeId} />}
        {panelTab === "traces" && <TracesTab key={selectedNodeId} workspaceId={selectedNodeId} />}
        {panelTab === "events" && <EventsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
@@ -8,11 +8,11 @@ import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
 import { useSocketEvent } from "@/hooks/useSocketEvent";
 import { type ChatMessage, type ChatAttachment, createMessage, appendMessageDeduped } from "./chat/types";
 import { uploadChatFiles, downloadChatFile, isPlatformAttachment } from "./chat/uploads";
-import { AttachmentChip, PendingAttachmentPill } from "./chat/AttachmentViews";
+import { PendingAttachmentPill } from "./chat/AttachmentViews";
+import { AttachmentPreview } from "./chat/AttachmentPreview";
 import { extractFilesFromTask } from "./chat/message-parser";
 import { AgentCommsPanel } from "./chat/AgentCommsPanel";
 import { appendActivityLine } from "./chat/activityLog";
-import { activityRowToMessages, type ActivityRowForHydration } from "./chat/historyHydration";
 import { runtimeDisplayName } from "@/lib/runtime-names";
 import { ConfirmDialog } from "@/components/ConfirmDialog";

@@ -49,38 +49,12 @@ interface A2AResponse {
  };
 }

-/** Detect activity-log rows that the workspace's own runtime fired
- *  against itself but were misclassified as canvas-source. The proper
- *  fix is the X-Workspace-ID header from `self_source_headers()` in
- *  workspace/platform_auth.py, which makes the platform record
- *  source_id = workspace_id. But three failure modes still leak a
- *  self-message into "My Chat":
- *
- *    1. Historical rows already in the DB with source_id=NULL.
- *    2. Workspace containers running pre-fix heartbeat.py / main.py
- *       (the fix only takes effect after an image rebuild + redeploy).
- *    3. Future internal triggers added without the helper.
- *
- *  This client-side filter recognises the heartbeat trigger by its
- *  exact prefix — the heartbeat assembles
- *
- *    "Delegation results are ready. Review them and take appropriate
- *     action:\n" + summary_lines + report_instruction
- *
- *  in workspace/heartbeat.py. The prefix is template-fixed so a
- *  string match is reliable. If the heartbeat copy ever changes,
- *  update this constant in the same commit.
- *
- *  This is a backstop, not the primary defence — the X-Workspace-ID
- *  header is. Filtering content is fragile to copy edits, so keep
- *  the list narrow. */
-const INTERNAL_SELF_MESSAGE_PREFIXES = [
-  "Delegation results are ready. Review them and take appropriate action",
-];
-
-function isInternalSelfMessage(text: string): boolean {
-  return INTERNAL_SELF_MESSAGE_PREFIXES.some((p) => text.startsWith(p));
-}
+// Internal-self-message filtering moved server-side in RFC #2945
+// PR-C/D — the platform's /chat-history endpoint applies the
+// IsInternalSelfMessage predicate before returning rows, so the
+// client no longer needs the local backstop on the history path.
+// The proper fix is still X-Workspace-ID header (source_id=workspace_id);
+// the platform-side prefix filter handles the residual cases.

 // extractReplyText pulls the agent's text reply out of an A2A response.
 // Concatenates ALL text parts (joined with "\n") rather than returning
@@ -133,8 +107,19 @@ const INITIAL_HISTORY_LIMIT = 10;
 const OLDER_HISTORY_BATCH = 20;

 /**
- * Load chat history from the activity_logs database via the platform API.
- * Uses source=canvas to only get user-initiated messages (not agent-to-agent).
+ * Load chat history from the platform's typed /chat-history endpoint.
+ *
+ * Server-side rendering of activity_logs rows into ChatMessage shape
+ * lives in workspace-server/internal/messagestore/postgres_store.go
+ * (RFC #2945 PR-C/D). The server already applies the canvas-source
+ * filter, the internal-self-message predicate, the role decision
+ * (status=error vs agent-error prefix → system), and the v0/v1
+ * file-shape extraction. Canvas just renders what it receives.
+ *
+ * Wire shape (mirrors ChatMessage exactly, no per-row mapping needed):
+ *
+ *   GET /workspaces/:id/chat-history?limit=N&before_ts=T
+ *   200 → {"messages": ChatMessage[], "reached_end": boolean}
 *
 * Pagination:
 *  - Pass `limit` to bound the page size (newest-first from server).
@@ -142,10 +127,10 @@ const OLDER_HISTORY_BATCH = 20;
 *    timestamp. Combined with limit, this yields the next-older page
 *    when scrolling backward through history.
 *
- * `reachedEnd` is true when the server returned fewer rows than asked
- * for — caller uses this to disable further older-batch fetches.
- * (Counts row-level returns, not chat-bubble count: each row may
- * produce 1-2 bubbles.)
+ * `reachedEnd` is propagated from the server. The server computes it
+ * by comparing rowCount vs limit so a partial last page is correctly
+ * detected even when the row→bubble fan-out is non-1:1 (each row
+ * produces 1-2 bubbles).
 */
 async function loadMessagesFromDB(
  workspaceId: string,
@@ -153,25 +138,23 @@ async function loadMessagesFromDB(
  beforeTs?: string,
 ): Promise<{ messages: ChatMessage[]; error: string | null; reachedEnd: boolean }> {
  try {
-    const params = new URLSearchParams({
-      type: "a2a_receive",
-      source: "canvas",
-      limit: String(limit),
-    });
+    const params = new URLSearchParams({ limit: String(limit) });
    if (beforeTs) params.set("before_ts", beforeTs);
-    const activities = await api.get<ActivityRowForHydration[]>(
-      `/workspaces/${workspaceId}/activity?${params.toString()}`,
+    const resp = await api.get<{ messages: ChatMessage[]; reached_end: boolean }>(
+      `/workspaces/${workspaceId}/chat-history?${params.toString()}`,
    );

-    const messages: ChatMessage[] = [];
-    // Activities are newest-first, reverse for chronological order.
-    // Per-row mapping lives in chat/historyHydration.ts so it can be
-    // unit-tested without spinning up the full ChatTab component
-    // (regression cover for the timestamp-collapse bug).
-    for (const a of [...activities].reverse()) {
-      messages.push(...activityRowToMessages(a, isInternalSelfMessage));
-    }
-    return { messages, error: null, reachedEnd: activities.length < limit };
+    // Server emits oldest-first within the page (RFC #2945 PR-C-2
+    // post-fix: server reverses row-aware before returning so the
+    // wire is display-ready). Canvas appends/prepends without
+    // reordering — this avoids the pair-flip bug a naive flat
+    // reverse causes when each row produces a (user, agent) pair
+    // with the same timestamp.
+    return {
+      messages: resp.messages ?? [],
+      error: null,
+      reachedEnd: resp.reached_end,
+    };
  } catch (err) {
    return {
      messages: [],
@@ -1137,8 +1120,9 @@ function MyChatPanel({ workspaceId, data }: Props) {
              {msg.attachments && msg.attachments.length > 0 && (
                <div className={`flex flex-wrap gap-1 ${msg.content ? "mt-1.5" : ""}`}>
                  {msg.attachments.map((att, i) => (
-                    <AttachmentChip
+                    <AttachmentPreview
                      key={`${msg.id}-${i}`}
+                      workspaceId={workspaceId}
                      attachment={att}
                      onDownload={downloadAttachment}
                      tone={msg.role === "user" ? "user" : "agent"}
@@ -21,20 +21,39 @@ interface Props {
 // --- Agent Card Section ---

 function AgentCardSection({ workspaceId }: { workspaceId: string }) {
-  const [card, setCard] = useState<Record<string, unknown> | null>(null);
-  const [loading, setLoading] = useState(true);
+  // Initial card value comes from the canvas store — node.data.agentCard
+  // is hydrated by the platform stream when the workspace appears in the
+  // graph, so reading it here avoids a duplicate `GET /workspaces/${id}`
+  // (the parent ConfigTab.loadConfig already fetches workspace metadata,
+  // and refetching here adds a serialised RTT to the panel-open path —
+  // contributed to the ~20s detail-panel load reported in core#11).
+  // Local state still tracks the edited/saved value so the editor flow
+  // is unchanged.
+  const storeCard = useCanvasStore((s) => {
+    // Defensive against test mocks that omit `nodes` (some test files
+    // stub the store with a minimal shape). In production `nodes` is
+    // always an array — empty or not — so the optional chaining only
+    // matters for the test path.
+    const node = s.nodes?.find?.((n) => n.id === workspaceId);
+    return (node?.data.agentCard as
+      | Record<string, unknown>
+      | null
+      | undefined) ?? null;
+  });
+  const [card, setCard] = useState<Record<string, unknown> | null>(storeCard);
  const [editing, setEditing] = useState(false);
  const [draft, setDraft] = useState("");
  const [saving, setSaving] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [success, setSuccess] = useState(false);

+  // If the store updates while this section is mounted (another tab
+  // pushed an update via the platform event stream), reflect that —
+  // unless the user is mid-edit, in which case we don't clobber their
+  // unsaved draft.
  useEffect(() => {
-    api.get<Record<string, unknown>>(`/workspaces/${workspaceId}`)
-      .then((ws) => setCard((ws.agent_card as Record<string, unknown>) || null))
-      .catch(() => {})
-      .finally(() => setLoading(false));
-  }, [workspaceId]);
+    if (!editing) setCard(storeCard);
+  }, [storeCard, editing]);

  const handleSave = async () => {
    setError(null);
@@ -53,9 +72,7 @@ function AgentCardSection({ workspaceId }: { workspaceId: string }) {

  return (
    <Section title="Agent Card" defaultOpen={false}>
-      {loading ? (
-        <div className="text-[10px] text-ink-soft">Loading...</div>
-      ) : editing ? (
+      {editing ? (
        <div className="space-y-2">
          <textarea
            aria-label="Agent card JSON editor"
@@ -221,47 +238,72 @@ export function ConfigTab({ workspaceId }: Props) {
    setLoading(true);
    setError(null);

-    // ALWAYS load workspace metadata first (runtime + model). These are the
-    // source of truth regardless of whether the runtime uses our config.yaml
-    // template. Without this the form falls back to empty/default values on
-    // a hermes workspace (which doesn't use our template), creating the
-    // appearance that the saved runtime is unset — and worse, clicking Save
-    // would silently flip `runtime` from `hermes` back to the dropdown
-    // default `LangGraph`. See GH #1894.
-    let wsMetadataRuntime = "";
-    let wsMetadataModel = "";
-    let wsMetadataTier: number | null = null;
-    try {
-      const ws = await api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`);
-      wsMetadataRuntime = (ws.runtime || "").trim();
-      if (typeof ws.tier === "number") wsMetadataTier = ws.tier;
-    } catch { /* fall back to config.yaml */ }
-    try {
-      const m = await api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`);
-      wsMetadataModel = (m.model || "").trim();
-    } catch { /* non-fatal */ }
+    // Load workspace metadata (runtime + model + provider) in parallel.
+    // These are independent GETs against three workspace-server endpoints
+    // and used to be awaited serially — for SaaS workspaces each call
+    // round-trips through an EIC SSH tunnel, so the previous serial
+    // pattern stacked 3-5s of tunnel-setup latency per call (core#11).
+    // Promise.all overlaps them; the per-call cost stays the same but
+    // wall time drops to max() instead of sum().
+    //
+    // Each leg has its own .catch handler that yields a sentinel value,
+    // matching the previous semantics:
+    //   - /workspaces/${id}: required source-of-truth for runtime+tier;
+    //     fall back to YAML if the GET fails (rare, network-class only).
+    //   - /workspaces/${id}/model: non-fatal; empty model lets the form
+    //     fall through to YAML runtime_config.model.
+    //   - /workspaces/${id}/provider: non-fatal; old workspace-servers
+    //     return 404, in which case provider="" and Save skips the PUT.
+    //
+    // See GH #1894 for the workspace-row-as-source-of-truth rationale
+    // that motivated splitting from a single config.yaml read.
+    const [wsRes, modelRes, providerRes] = await Promise.all([
+      api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`)
+        .catch(() => ({} as { runtime?: string; tier?: number })),
+      api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`)
+        .catch(() => ({} as { model?: string })),
+      api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`)
+        .catch(() => null),
+    ]);
+    const wsMetadataRuntime = (wsRes.runtime || "").trim();
+    const wsMetadataModel = (modelRes.model || "").trim();
+    const wsMetadataTier: number | null =
+      typeof wsRes.tier === "number" ? wsRes.tier : null;
+    if (providerRes !== null) {
+      const loadedProvider = (providerRes.provider || "").trim();
+      setProvider(loadedProvider);
+      setOriginalProvider(loadedProvider);
+    } else {
+      setProvider("");
+      setOriginalProvider("");
+    }
    // originalModel is set further down once the YAML has been parsed —
    // we want it to reflect what the form ACTUALLY rendered, which may
    // be the YAML's runtime_config.model fallback when MODEL_PROVIDER
    // is empty. Setting it here from wsMetadataModel alone would be
    // wrong for hermes/pre-#240 workspaces.

-    // Load explicit provider override (Option B PR-5). Endpoint returns
-    // {provider: "", source: "default"} when no override is set, so the
-    // empty string is the legitimate "auto-derive" signal — don't treat
-    // it as a load error. Non-fatal: an older workspace-server that
-    // predates PR-2 returns 404 here; the form falls back to "" and
-    // Save just won't PUT the provider field.
-    try {
-      const p = await api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`);
-      const loadedProvider = (p.provider || "").trim();
-      setProvider(loadedProvider);
-      setOriginalProvider(loadedProvider);
-    } catch {
-      setProvider("");
-      setOriginalProvider("");
+    // Skip the config.yaml fetch entirely for runtimes that manage
+    // their own config (external, hermes, etc.) — they don't have a
+    // platform-side template, so the GET would 404. The catch block
+    // below handles 404 gracefully, but issuing the request adds
+    // browser-console noise + a wasted RTT on every open of the
+    // Config tab for the affected workspaces. Reported on
+    // production reno-stars 2026-05-05 (workspace runtime=external,
+    // 404 on /files/config.yaml visible in the console even though
+    // the form rendered correctly).
+    if (RUNTIMES_WITH_OWN_CONFIG.has(wsMetadataRuntime)) {
+      setConfig({
+        ...DEFAULT_CONFIG,
+        runtime: wsMetadataRuntime,
+        model: wsMetadataModel,
+        ...(wsMetadataModel ? { runtime_config: { model: wsMetadataModel } } : {}),
+        ...(wsMetadataTier !== null ? { tier: wsMetadataTier } : {}),
+      } as ConfigData);
+      setOriginalModel(wsMetadataModel);
+      setLoading(false);
+      return;
    }
-
    try {
      const res = await api.get<{ content: string }>(`/workspaces/${workspaceId}/files/config.yaml`);
      const parsed = parseYaml(res.content);
@@ -2,9 +2,11 @@

 import { useState, useEffect, useRef, useMemo } from "react";
 import { showToast } from "../Toaster";
+import type { WorkspaceNodeData } from "@/store/canvas";
 import { FilesToolbar } from "./FilesTab/FilesToolbar";
 import { FileTree } from "./FilesTab/FileTree";
 import { FileEditor } from "./FilesTab/FileEditor";
+import { NotAvailablePanel } from "./FilesTab/NotAvailablePanel";
 import { useFilesApi } from "./FilesTab/useFilesApi";
 import { buildTree } from "./FilesTab/tree";

@@ -14,9 +16,40 @@ export type { TreeNode } from "./FilesTab/tree";

 interface Props {
  workspaceId: string;
+  /** Workspace metadata from the canvas store. Optional for back-compat
+   *  with any caller that still mounts <FilesTab workspaceId=.../> without
+   *  threading data through (legacy tests). When present, runtime gates
+   *  the early-return below. Mirrors TerminalTab's prop shape (#2830). */
+  data?: WorkspaceNodeData;
 }

-export function FilesTab({ workspaceId }: Props) {
+/** Runtimes whose filesystem the platform doesn't own. The canvas can't
+ *  list/read/write files on these — the agent runs on the user's own
+ *  hardware (mac laptop, mac mini, hermes-on-home-server) and reaches
+ *  the platform via the heartbeat-based polling Phase 30 layer.
+ *
+ *  Keep narrow — only add a runtime here when its provisioner genuinely
+ *  has no platform-owned filesystem. Otherwise the user loses access to
+ *  a real surface (e.g. claude-code SaaS workspaces have files served
+ *  by ListFiles via EIC; they belong on the rendering path, not here). */
+const RUNTIMES_WITHOUT_FILES = new Set(["external"]);
+
+export function FilesTab({ workspaceId, data }: Props) {
+  // Early-return for runtimes whose filesystem is not platform-owned.
+  // Skips the whole useFilesApi hook + tree render below — without this,
+  // mounting the tab for an external workspace would issue a GET that
+  // the platform can technically answer (it reads its own DB row, not
+  // the user's machine), but every result row is fictional. Showing
+  // "0 files / No config files yet" reads as a bug. The placeholder
+  // makes the absence intentional and points the user at the right
+  // surface (Chat).
+  if (data && RUNTIMES_WITHOUT_FILES.has(data.runtime)) {
+    return <NotAvailablePanel runtime={data.runtime} />;
+  }
+  return <PlatformOwnedFilesTab workspaceId={workspaceId} />;
+}
+
+function PlatformOwnedFilesTab({ workspaceId }: { workspaceId: string }) {
  const [root, setRoot] = useState("/configs");
  const [selectedFile, setSelectedFile] = useState<string | null>(null);
  const [fileContent, setFileContent] = useState("");
@@ -45,11 +78,36 @@ export function FilesTab({ workspaceId }: Props) {
    readFile,
    writeFile,
    deleteFile,
+    downloadFileByPath,
    downloadAllFiles,
    uploadFiles,
+    uploadDataTransferItems,
    deleteAllFiles,
  } = useFilesApi(workspaceId, root);

+  // PR-D: track whether the user is currently dragging files OVER
+  // the root area (not over a specific subdir row). Used to show
+  // the "Drop to upload to root" highlight on the tree column.
+  const [rootDragHover, setRootDragHover] = useState(false);
+
+  const handleDropToTarget = (
+    targetDir: string,
+    items: DataTransferItemList,
+  ) => {
+    // canDelete is the gate proxy — same constraint as the toolbar
+    // Upload button (today only /configs is writable from the canvas
+    // surface). Without this check, dropping on /home would post
+    // through /workspaces/<id>/files/<path>, which the backend would
+    // reject only after an HTTP round-trip. Fail fast.
+    if (root !== "/configs") {
+      setError(
+        `Upload only allowed in /configs (current root: ${root}). Switch root or use Upload button.`,
+      );
+      return;
+    }
+    void uploadDataTransferItems(items, targetDir);
+  };
+
  const tree = useMemo(() => buildTree(files), [files]);

  const openFile = async (path: string) => {
@@ -190,8 +248,46 @@ export function FilesTab({ workspaceId }: Props) {
      )}

      <div className="flex flex-1 min-h-0">
-        {/* File tree */}
-        <div className="w-[180px] border-r border-line/40 overflow-y-auto shrink-0">
+        {/* File tree column. PR-D: outer div is the drop zone for
+            "drop on root" — when the user drags into the column area
+            (not over a specific subdir row), the drop targets the
+            current root directory. Subdirectory rows in <FileTree>
+            stop propagation on their own drop event so a drop on
+            /configs/skills doesn't ALSO fire root-area drop. */}
+        <div
+          className={`w-[180px] border-r border-line/40 overflow-y-auto shrink-0 transition-colors ${
+            rootDragHover ? "bg-accent/10 outline outline-1 outline-accent/40 -outline-offset-2" : ""
+          }`}
+          onDragOver={(e) => {
+            // Only highlight + accept the drop when uploads are
+            // actually allowed for the current root. Without this
+            // check the user gets a misleading drag affordance,
+            // drops, then sees the toolbar's "switch root" toast —
+            // bad UX.
+            if (root !== "/configs") return;
+            e.preventDefault();
+            e.dataTransfer.dropEffect = "copy";
+          }}
+          onDragEnter={(e) => {
+            if (root !== "/configs") return;
+            e.preventDefault();
+            setRootDragHover(true);
+          }}
+          onDragLeave={(e) => {
+            const next = e.relatedTarget as Node | null;
+            if (!next || !(e.currentTarget as HTMLElement).contains(next)) {
+              setRootDragHover(false);
+            }
+          }}
+          onDrop={(e) => {
+            if (root !== "/configs") return;
+            e.preventDefault();
+            setRootDragHover(false);
+            if (e.dataTransfer.items?.length) {
+              handleDropToTarget("", e.dataTransfer.items);
+            }
+          }}
+        >
          {/* New file input */}
          {showNewFile && (
            <div className="px-2 py-1 border-b border-line/40">
@@ -209,14 +305,27 @@ export function FilesTab({ workspaceId }: Props) {

          {files.length === 0 ? (
            <div className="px-3 py-4 text-[10px] text-ink-soft text-center">
-              No config files yet
+              {rootDragHover
+                ? "Drop to upload to root"
+                : root === "/configs"
+                  ? "No config files yet — drag files here to upload"
+                  : "No config files yet"}
            </div>
          ) : (
            <FileTree
              nodes={tree}
              selectedPath={selectedFile}
              onSelect={openFile}
+              // Delete is currently gated to /configs to match the
+              // toolbar's New / Upload / Clear affordances. Context
+              // menu and inline ✕ both honour the gate. PR-A made the
+              // backend EIC delete work on all roots — keeping the
+              // canvas gate conservative until we want to expose
+              // /home /workspace deletion intentionally.
              onDelete={root === "/configs" ? setConfirmDelete : () => {}}
+              onDownload={downloadFileByPath}
+              canDelete={root === "/configs"}
+              onDropToTarget={handleDropToTarget}
              expandedDirs={expandedDirs}
              onToggleDir={toggleDir}
              loadingDir={loadingDir}
@@ -1,41 +1,129 @@
 "use client";

+import { useState } from "react";
 import { type TreeNode, getIcon } from "./tree";
+import { FileTreeContextMenu, type MenuItem } from "./FileTreeContextMenu";

 interface TreeCallbacks {
  selectedPath: string | null;
  onSelect: (path: string) => void;
  onDelete: (path: string) => void;
+  /** PR-C: right-click → Download. Files only — directories ignore. */
+  onDownload: (path: string) => void;
+  /** Whether the active root permits delete. Wire into the Delete
+   *  context-menu item's `disabled` flag so the user gets the same
+   *  affordance as the toolbar (which gates Clear/New on /configs). */
+  canDelete: boolean;
+  /** PR-D: drop files/folders from the OS onto this row. targetDir
+   *  is the directory path (relative to the active root) under which
+   *  the dropped contents should land; "" means root. */
+  onDropToTarget?: (targetDir: string, items: DataTransferItemList) => void;
  expandedDirs: Set<string>;
  onToggleDir: (path: string) => void;
  loadingDir: string | null;
 }

+/**
+ * FileTree renders the workspace tree + owns the right-click context
+ * menu (PR-C) and the drop-target hover state (PR-D). Lifting the
+ * menu state here (vs each row) means only one menu open at a time —
+ * opening a new row's menu auto-closes the prior one. Same UX as
+ * VSCode / Theia.
+ */
 export function FileTree({
  nodes,
  selectedPath,
  onSelect,
  onDelete,
+  onDownload,
+  canDelete,
+  onDropToTarget,
  expandedDirs,
  onToggleDir,
  loadingDir,
  depth = 0,
 }: TreeCallbacks & { nodes: TreeNode[]; depth?: number }) {
+  const [menu, setMenu] = useState<{
+    x: number;
+    y: number;
+    items: MenuItem[];
+  } | null>(null);
+  // PR-D: hover-target highlight state for drag-drop. Lifted next to
+  // the menu state so both shared-across-rows interactions live in
+  // one place.
+  const [hoverDir, setHoverDir] = useState<string | null>(null);
+
+  const openContextMenu = (e: React.MouseEvent, node: TreeNode) => {
+    e.preventDefault();
+    // Items composed per-row so the available actions reflect the
+    // node type (files get Open + Download; directories get Delete
+    // only since "open a directory in the editor" doesn't apply
+    // and "Export folder" is the toolbar's job).
+    const items: MenuItem[] = [];
+    if (!node.isDir) {
+      items.push({
+        id: "open",
+        label: "Open",
+        icon: "⤴",
+        onClick: () => onSelect(node.path),
+      });
+      items.push({
+        id: "download",
+        label: "Download",
+        icon: "↓",
+        onClick: () => onDownload(node.path),
+      });
+    }
+    items.push({
+      id: "delete",
+      label: "Delete",
+      icon: "✕",
+      destructive: true,
+      disabled: !canDelete,
+      onClick: () => onDelete(node.path),
+    });
+    setMenu({ x: e.clientX, y: e.clientY, items });
+  };
+
+  // Single state lifted to the top-level tree; nested <FileTree>s
+  // (rendered for expanded directories below) do NOT instantiate
+  // their own menus or drop-targets — they call back via prop
+  // drilling. This keeps "only one menu open" + "only one drop
+  // target highlighted" as structural invariants rather than
+  // render-order coincidences.
+  const childCallbacks: TreeCallbacks = {
+    selectedPath,
+    onSelect,
+    onDelete,
+    onDownload,
+    canDelete,
+    onDropToTarget,
+    expandedDirs,
+    onToggleDir,
+    loadingDir,
+  };
+
  return (
    <div>
      {nodes.map((node) => (
        <TreeItem
          key={`${node.path}:${node.isDir ? "dir" : "file"}`}
          node={node}
-          selectedPath={selectedPath}
-          onSelect={onSelect}
-          onDelete={onDelete}
-          expandedDirs={expandedDirs}
-          onToggleDir={onToggleDir}
-          loadingDir={loadingDir}
+          openContextMenu={openContextMenu}
+          hoverDir={hoverDir}
+          setHoverDir={setHoverDir}
          depth={depth}
+          {...childCallbacks}
        />
      ))}
+      {menu && (
+        <FileTreeContextMenu
+          x={menu.x}
+          y={menu.y}
+          items={menu.items}
+          onClose={() => setMenu(null)}
+        />
+      )}
    </div>
  );
 }
@@ -45,22 +133,81 @@ function TreeItem({
  selectedPath,
  onSelect,
  onDelete,
+  onDownload,
+  canDelete,
+  onDropToTarget,
  expandedDirs,
  onToggleDir,
  loadingDir,
  depth,
-}: TreeCallbacks & { node: TreeNode; depth: number }) {
+  openContextMenu,
+  hoverDir,
+  setHoverDir,
+}: TreeCallbacks & {
+  node: TreeNode;
+  depth: number;
+  openContextMenu: (e: React.MouseEvent, node: TreeNode) => void;
+  hoverDir: string | null;
+  setHoverDir: (p: string | null) => void;
+}) {
  const isSelected = selectedPath === node.path;
  const expanded = expandedDirs.has(node.path);
  const isLoading = loadingDir === node.path;
+  const isDropTarget = node.isDir && hoverDir === node.path;
+
+  // PR-D drag handlers — only directory rows are valid drop targets
+  // (dropping a file ON another file is ambiguous; treat it as
+  // dropping in the parent dir, which the root area handles). When a
+  // drag enters a directory row, mark it the hover target. When the
+  // cursor leaves to a non-child element, clear it. drop fires the
+  // upload callback with the row's path.
+  const dragProps = node.isDir && onDropToTarget
+    ? {
+        onDragOver: (e: React.DragEvent) => {
+          // preventDefault is REQUIRED to opt this element into the
+          // drop target list — without it, browsers refuse to fire
+          // the drop event regardless of the drop handler.
+          e.preventDefault();
+          e.dataTransfer.dropEffect = "copy";
+        },
+        onDragEnter: (e: React.DragEvent) => {
+          e.preventDefault();
+          setHoverDir(node.path);
+        },
+        onDragLeave: (e: React.DragEvent) => {
+          // Only clear hover when leaving to an element OUTSIDE this
+          // row — bare leave-events fire for every child crossed
+          // (the icon, the label, the ✕ button). Without the
+          // contains() check the highlight flickers.
+          const next = e.relatedTarget as Node | null;
+          if (!next || !(e.currentTarget as HTMLElement).contains(next)) {
+            setHoverDir(null);
+          }
+        },
+        onDrop: (e: React.DragEvent) => {
+          e.preventDefault();
+          e.stopPropagation();
+          setHoverDir(null);
+          if (e.dataTransfer.items?.length) {
+            onDropToTarget(node.path, e.dataTransfer.items);
+          }
+        },
+      }
+    : {};

  if (node.isDir) {
    return (
      <div>
        <div
-          className="group w-full flex items-center gap-1 px-2 py-0.5 text-left hover:bg-surface-card/40 transition-colors cursor-pointer"
+          className={`group w-full flex items-center gap-1 px-2 py-0.5 text-left transition-colors cursor-pointer ${
+            isDropTarget
+              ? "bg-accent/20 outline outline-1 outline-accent/60"
+              : "hover:bg-surface-card/40"
+          }`}
          style={{ paddingLeft: `${depth * 12 + 8}px` }}
          onClick={() => onToggleDir(node.path)}
+          onContextMenu={(e) => openContextMenu(e, node)}
+          {...dragProps}
        >
          <span className="text-[9px] text-ink-soft w-3">{isLoading ? "…" : expanded ? "▼" : "▶"}</span>
          <span className="text-[10px]">📁</span>
@@ -82,6 +229,9 @@ function TreeItem({
            selectedPath={selectedPath}
            onSelect={onSelect}
            onDelete={onDelete}
+            onDownload={onDownload}
+            canDelete={canDelete}
+            onDropToTarget={onDropToTarget}
            expandedDirs={expandedDirs}
            onToggleDir={onToggleDir}
            loadingDir={loadingDir}
@@ -99,6 +249,7 @@ function TreeItem({
      }`}
      style={{ paddingLeft: `${depth * 12 + 20}px` }}
      onClick={() => onSelect(node.path)}
+      onContextMenu={(e) => openContextMenu(e, node)}
    >
      <span className="text-[9px]">{getIcon(node.name, false)}</span>
      <span className="text-[10px] flex-1 truncate font-mono">{node.name}</span>
@@ -0,0 +1,141 @@
+"use client";
+
+import { useEffect, useRef } from "react";
+
+/**
+ * FileTreeContextMenu — VSCode-style right-click menu for a single
+ * file-tree row. Pops at the cursor's viewport coords; dismisses on
+ * outside-click, Esc, blur, or scroll.
+ *
+ * Why a custom component (no library): the menu is one of several
+ * "small popovers" in canvas; pulling in a dnd / popover lib for one
+ * surface adds 10x the bytes of this implementation. The patterns
+ * (outside-click + Esc + portal-free fixed position) match the
+ * ContextMenu used in canvas/Toolbar so the keyboard-nav muscle
+ * memory is uniform.
+ *
+ * Items are rendered from a `MenuItem[]` so callers can add/remove
+ * actions without touching this component (e.g. PR-D will add an
+ * "Upload to this folder" item for directory rows).
+ *
+ * Accessibility:
+ * - role="menu" + role="menuitem" so screen readers announce the
+ *   surface as a menu, not a generic div.
+ * - First item gets autofocus so keyboard users can ↓/↑/Enter without
+ *   reaching for the mouse.
+ * - Esc + outside-click + Tab dismisses; behaves like every other
+ *   menu the user has touched on the canvas.
+ */
+export interface MenuItem {
+  /** Stable identifier for testing + analytics. */
+  id: string;
+  label: string;
+  /** Optional left icon glyph; not load-bearing. */
+  icon?: string;
+  /** Destructive (rendered in red) — for Delete-class actions. */
+  destructive?: boolean;
+  /** Item-specific click handler. The menu auto-closes after onClick
+   *  fires so handlers don't have to call onClose themselves. */
+  onClick: () => void;
+  /** Disabled items render but don't fire onClick (useful for
+   *  Delete-on-non-/configs case where the caller wants to surface
+   *  the item but explain it's gated). Currently unused — placeholder
+   *  for future options. */
+  disabled?: boolean;
+}
+
+interface Props {
+  /** Viewport-coordinate position of the cursor that opened the menu. */
+  x: number;
+  y: number;
+  items: MenuItem[];
+  onClose: () => void;
+}
+
+export function FileTreeContextMenu({ x, y, items, onClose }: Props) {
+  const ref = useRef<HTMLDivElement>(null);
+  // First item gets initial focus for keyboard ↓/↑/Enter nav.
+  const firstItemRef = useRef<HTMLButtonElement>(null);
+
+  useEffect(() => {
+    firstItemRef.current?.focus();
+  }, []);
+
+  // Outside-click + Esc dismiss. Per memory
+  // (feedback_abort_controller_for_rerendered_listeners), use an
+  // AbortController so re-mounts (caller toggles the menu) don't leak
+  // listeners.
+  useEffect(() => {
+    const ctrl = new AbortController();
+    const onPointerDown = (e: MouseEvent) => {
+      if (ref.current && !ref.current.contains(e.target as Node)) onClose();
+    };
+    const onKeyDown = (e: KeyboardEvent) => {
+      if (e.key === "Escape") {
+        e.preventDefault();
+        onClose();
+      } else if (e.key === "ArrowDown" || e.key === "ArrowUp") {
+        // Roving focus across .menuitem buttons. Doing this with
+        // tabindex management because Tab / Shift+Tab leave the menu
+        // (which is the right thing — the user is escaping the menu).
+        e.preventDefault();
+        const buttons = ref.current?.querySelectorAll<HTMLButtonElement>(
+          "[role='menuitem']:not([disabled])",
+        );
+        if (!buttons || buttons.length === 0) return;
+        const arr = Array.from(buttons);
+        const cur = arr.indexOf(document.activeElement as HTMLButtonElement);
+        const next =
+          e.key === "ArrowDown"
+            ? (cur + 1) % arr.length
+            : (cur - 1 + arr.length) % arr.length;
+        arr[next].focus();
+      }
+    };
+    // `mousedown` (not `click`) so the menu dismisses BEFORE the
+    // tree-row's click handler would fire — otherwise clicking
+    // outside also selects a different row, which is not what the
+    // user expected when "outside-click closes the menu".
+    document.addEventListener("mousedown", onPointerDown, { signal: ctrl.signal });
+    document.addEventListener("keydown", onKeyDown, { signal: ctrl.signal });
+    // Scroll inside any ancestor also dismisses — the fixed-position
+    // menu would otherwise stay anchored to viewport coords while the
+    // row it points at scrolled away. Use capture so we catch scroll
+    // on inner panels (FileTree's overflow-y-auto wrapper).
+    document.addEventListener("scroll", onClose, { signal: ctrl.signal, capture: true });
+    return () => ctrl.abort();
+  }, [onClose]);
+
+  return (
+    <div
+      ref={ref}
+      role="menu"
+      aria-label="File actions"
+      className="fixed z-[1000] min-w-[140px] py-1 bg-surface-elevated border border-line/60 rounded-md shadow-xl shadow-black/30 text-[11px]"
+      style={{ left: x, top: y }}
+    >
+      {items.map((item, i) => (
+        <button
+          key={item.id}
+          ref={i === 0 ? firstItemRef : undefined}
+          type="button"
+          role="menuitem"
+          disabled={item.disabled}
+          onClick={() => {
+            if (item.disabled) return;
+            item.onClick();
+            onClose();
+          }}
+          className={
+            item.destructive
+              ? "w-full text-left px-3 py-1 text-bad hover:bg-red-900/30 focus:bg-red-900/30 focus:outline-none disabled:opacity-40 disabled:pointer-events-none transition-colors"
+              : "w-full text-left px-3 py-1 text-ink-mid hover:bg-surface-card hover:text-ink focus:bg-surface-card focus:text-ink focus:outline-none disabled:opacity-40 disabled:pointer-events-none transition-colors"
+          }
+        >
+          {item.icon && <span className="inline-block w-4 mr-1.5 text-ink-soft">{item.icon}</span>}
+          {item.label}
+        </button>
+      ))}
+    </div>
+  );
+}
@@ -0,0 +1,58 @@
+"use client";
+
+/**
+ * NotAvailablePanel — full-tab placeholder for runtimes whose filesystem
+ * the platform doesn't own (today: runtime === "external").
+ *
+ * Pre-fix the FilesTab tried to GET /workspaces/<id>/files for these
+ * workspaces. The platform answered with [] (no rows in workspace_files
+ * for an external workspace by definition), but the canvas rendered
+ * "0 files / No config files yet" which reads identically to the SaaS
+ * empty-listing bug fixed in PR-A. Showing an explicit placeholder
+ * makes the absence intentional and routes the user toward the
+ * supported surface (Chat) for these workspaces.
+ *
+ * Mirrors the same affordance TerminalTab adopted for runtimes without
+ * a TTY in PR #2830 — uniform "feature-not-applicable" UX across tabs.
+ */
+export function NotAvailablePanel({ runtime }: { runtime: string }) {
+  return (
+    <div className="flex flex-col items-center justify-center h-full p-8 text-center bg-surface-sunken/30">
+      {/* Folder-with-slash icon. Custom inline SVG so we don't depend
+          on an icon set being present at canvas build-time (matches
+          TerminalTab's NotAvailablePanel pattern). */}
+      <svg
+        width="72"
+        height="72"
+        viewBox="0 0 72 72"
+        fill="none"
+        aria-hidden="true"
+        className="text-ink-soft mb-4"
+      >
+        {/* Folder body */}
+        <path
+          d="M10 22 L10 56 a4 4 0 0 0 4 4 L58 60 a4 4 0 0 0 4 -4 L62 26 a4 4 0 0 0 -4 -4 L34 22 L28 16 L14 16 a4 4 0 0 0 -4 4 Z"
+          stroke="currentColor"
+          strokeWidth="2.5"
+          strokeLinejoin="round"
+          fill="none"
+          opacity="0.6"
+        />
+        {/* Diagonal cancel slash */}
+        <path
+          d="M14 14 L58 58"
+          stroke="currentColor"
+          strokeWidth="3"
+          strokeLinecap="round"
+        />
+      </svg>
+      <h3 className="text-sm font-medium text-ink mb-1.5">Files not available</h3>
+      <p className="text-[11px] text-ink-soft max-w-xs leading-relaxed">
+        This workspace runs the{" "}
+        <span className="font-mono text-ink-mid">{runtime}</span> runtime,
+        whose filesystem isn't owned by the platform. Use the Chat tab to
+        interact with the agent directly.
+      </p>
+    </div>
+  );
+}
@@ -0,0 +1,136 @@
+// @vitest-environment jsdom
+//
+// Pins the right-click context menu added in PR-C of issue #2999.
+// VSCode-style affordance: Open / Download / Delete on file rows,
+// Delete on directory rows. Delete is gated by `canDelete` (parent
+// only enables on /configs root, matching the toolbar's gate).
+//
+// Pinned branches:
+//   1. Right-click on a file row opens the menu at the click coords
+//      with Open + Download + Delete items.
+//   2. Right-click on a directory row opens the menu with Delete
+//      only (no Open/Download — directories don't have one-click
+//      semantics in this surface).
+//   3. Clicking Download fires the onDownload callback with the
+//      row's path.
+//   4. Clicking Delete fires onDelete with the row's path (when
+//      canDelete=true).
+//   5. Delete is disabled in the rendered menu when canDelete=false
+//      and clicking it does NOT fire onDelete (gate is real).
+//   6. Esc dismisses the menu.
+//   7. Click outside the menu dismisses it.
+
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { render, screen, cleanup, fireEvent, act } from "@testing-library/react";
+import React from "react";
+import { FileTree } from "../FileTree";
+import type { TreeNode } from "../tree";
+
+afterEach(cleanup);
+
+const file: TreeNode = { name: "config.yaml", path: "config.yaml", isDir: false, children: [], size: 0 };
+const dir: TreeNode = {
+  name: "skills",
+  path: "skills",
+  isDir: true,
+  children: [],
+  size: 0,
+};
+
+function renderTree(props: Partial<React.ComponentProps<typeof FileTree>> = {}) {
+  const defaults = {
+    nodes: [file, dir],
+    selectedPath: null,
+    onSelect: vi.fn(),
+    onDelete: vi.fn(),
+    onDownload: vi.fn(),
+    canDelete: true,
+    expandedDirs: new Set<string>(),
+    onToggleDir: vi.fn(),
+    loadingDir: null,
+  };
+  const merged = { ...defaults, ...props };
+  return { ...render(<FileTree {...merged} />), props: merged };
+}
+
+describe("FileTree right-click context menu", () => {
+  it("right-click on a file row opens menu with Open/Download/Delete", () => {
+    renderTree();
+    fireEvent.contextMenu(screen.getByText("config.yaml"), {
+      clientX: 50,
+      clientY: 100,
+    });
+    expect(screen.getByRole("menu")).not.toBeNull();
+    expect(screen.getByRole("menuitem", { name: /Open/i })).not.toBeNull();
+    expect(screen.getByRole("menuitem", { name: /Download/i })).not.toBeNull();
+    expect(screen.getByRole("menuitem", { name: /Delete/i })).not.toBeNull();
+  });
+
+  it("right-click on a directory row opens menu with Delete only (no Open/Download)", () => {
+    renderTree();
+    fireEvent.contextMenu(screen.getByText("skills"), { clientX: 60, clientY: 120 });
+    expect(screen.getByRole("menu")).not.toBeNull();
+    expect(screen.queryByRole("menuitem", { name: /Open/i })).toBeNull();
+    expect(screen.queryByRole("menuitem", { name: /Download/i })).toBeNull();
+    expect(screen.getByRole("menuitem", { name: /Delete/i })).not.toBeNull();
+  });
+
+  it("clicking Download fires onDownload with the row's path", () => {
+    const { props } = renderTree();
+    fireEvent.contextMenu(screen.getByText("config.yaml"), { clientX: 0, clientY: 0 });
+    fireEvent.click(screen.getByRole("menuitem", { name: /Download/i }));
+    expect(props.onDownload).toHaveBeenCalledWith("config.yaml");
+    // Menu auto-closes after click.
+    expect(screen.queryByRole("menu")).toBeNull();
+  });
+
+  it("clicking Delete fires onDelete with the row's path when canDelete=true", () => {
+    const { props } = renderTree({ canDelete: true });
+    fireEvent.contextMenu(screen.getByText("config.yaml"), { clientX: 0, clientY: 0 });
+    fireEvent.click(screen.getByRole("menuitem", { name: /Delete/i }));
+    expect(props.onDelete).toHaveBeenCalledWith("config.yaml");
+  });
+
+  it("Delete is disabled when canDelete=false; clicking does not fire onDelete", () => {
+    const { props } = renderTree({ canDelete: false });
+    fireEvent.contextMenu(screen.getByText("config.yaml"), { clientX: 0, clientY: 0 });
+    const del = screen.getByRole("menuitem", { name: /Delete/i }) as HTMLButtonElement;
+    expect(del.disabled).toBe(true);
+    fireEvent.click(del);
+    expect(props.onDelete).not.toHaveBeenCalled();
+    // Menu stays open on disabled click — same as VSCode (the user
+    // can read the disabled-state hint without losing the menu).
+    expect(screen.getByRole("menu")).not.toBeNull();
+  });
+
+  it("Esc dismisses the menu", () => {
+    renderTree();
+    fireEvent.contextMenu(screen.getByText("config.yaml"), { clientX: 0, clientY: 0 });
+    expect(screen.getByRole("menu")).not.toBeNull();
+    act(() => {
+      fireEvent.keyDown(document, { key: "Escape" });
+    });
+    expect(screen.queryByRole("menu")).toBeNull();
+  });
+
+  it("click outside the menu dismisses it", () => {
+    renderTree();
+    fireEvent.contextMenu(screen.getByText("config.yaml"), { clientX: 0, clientY: 0 });
+    expect(screen.getByRole("menu")).not.toBeNull();
+    // mousedown on document.body — outside the menu.
+    act(() => {
+      fireEvent.mouseDown(document.body);
+    });
+    expect(screen.queryByRole("menu")).toBeNull();
+  });
+
+  it("opening a second context menu replaces the first (only one open at a time)", () => {
+    renderTree();
+    fireEvent.contextMenu(screen.getByText("config.yaml"), { clientX: 10, clientY: 10 });
+    fireEvent.contextMenu(screen.getByText("skills"), { clientX: 20, clientY: 20 });
+    // Only one menu in the DOM. The second open replaced the first
+    // because the menu state is lifted to the FileTree, not per-row.
+    const menus = screen.getAllByRole("menu");
+    expect(menus.length).toBe(1);
+  });
+});
@@ -0,0 +1,212 @@
+// @vitest-environment jsdom
+//
+// Pins the drag-drop upload added in PR-D of issue #2999.
+// Two layers of coverage:
+//
+//  1. The pure walker (collectFileEntries / walkEntry) — pins the
+//     recursion shape against silent folder truncation. Browsers
+//     return up to ~100 entries per readEntries() call; if the loop
+//     stops early, large folder uploads silently drop files. We
+//     simulate a multi-batch reader to discriminate.
+//
+//  2. FileTree directory-row drop handlers — pins that dragover/drop
+//     events fire onDropToTarget with the directory's path + the
+//     drop's DataTransferItemList.
+
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { render, screen, cleanup, fireEvent } from "@testing-library/react";
+import React from "react";
+import { FileTree } from "../FileTree";
+import type { TreeNode } from "../tree";
+import { __testables } from "../useFilesApi";
+
+afterEach(cleanup);
+
+// ---- Walker tests ----
+
+/**
+ * Build a fake FileSystemEntry tree we can hand to walkEntry. The
+ * shape mimics what webkitGetAsEntry returns from a real OS drag —
+ * directory entries expose createReader, file entries expose file().
+ */
+function fakeFileEntry(name: string, content = "x"): {
+  isFile: true;
+  isDirectory: false;
+  name: string;
+  fullPath: string;
+  file: (cb: (f: File) => void) => void;
+} {
+  return {
+    isFile: true,
+    isDirectory: false,
+    name,
+    fullPath: "/" + name,
+    file: (cb) => cb(new File([content], name, { type: "text/plain" })),
+  };
+}
+
+function fakeDirEntry(
+  name: string,
+  childBatches: ReturnType<typeof fakeFileEntry>[][],
+): {
+  isFile: false;
+  isDirectory: true;
+  name: string;
+  fullPath: string;
+  createReader: () => { readEntries: (cb: (entries: unknown[]) => void) => void };
+} {
+  let i = 0;
+  return {
+    isFile: false,
+    isDirectory: true,
+    name,
+    fullPath: "/" + name,
+    createReader: () => ({
+      readEntries: (cb) => {
+        // Mimic browser semantics: emit one batch per call, then
+        // an empty array to signal end-of-stream. A walker that
+        // calls readEntries only once would silently truncate at
+        // the first batch.
+        if (i < childBatches.length) {
+          cb(childBatches[i++]);
+        } else {
+          cb([]);
+        }
+      },
+    }),
+  };
+}
+
+describe("walkEntry — folder-recursion drop walker", () => {
+  it("collects a single dropped file", async () => {
+    const out: { file: File; relativePath: string }[] = [];
+    await __testables.walkEntry(fakeFileEntry("README.md") as never, "", out);
+    expect(out.length).toBe(1);
+    expect(out[0].relativePath).toBe("README.md");
+    expect(out[0].file.name).toBe("README.md");
+  });
+
+  it("walks a folder and preserves the relative path under the folder name", async () => {
+    const out: { file: File; relativePath: string }[] = [];
+    const folder = fakeDirEntry("skills", [
+      [fakeFileEntry("a.md"), fakeFileEntry("b.md")],
+    ]);
+    await __testables.walkEntry(folder as never, "", out);
+    expect(out.map((e) => e.relativePath).sort()).toEqual([
+      "skills/a.md",
+      "skills/b.md",
+    ]);
+  });
+
+  it("loops readEntries until empty so a multi-batch folder isn't truncated", async () => {
+    // Browsers limit each readEntries() call to ~100 entries. Our
+    // walker MUST call it again until an empty batch is returned.
+    // Fake reader emits two batches of 2 + an implicit empty → 4
+    // total. A buggy walker that only takes the first batch would
+    // see only 2.
+    const out: { file: File; relativePath: string }[] = [];
+    const folder = fakeDirEntry("big", [
+      [fakeFileEntry("1.txt"), fakeFileEntry("2.txt")],
+      [fakeFileEntry("3.txt"), fakeFileEntry("4.txt")],
+    ]);
+    await __testables.walkEntry(folder as never, "", out);
+    expect(out.length).toBe(4);
+  });
+
+  it("walks nested directories and accumulates the full path", async () => {
+    const out: { file: File; relativePath: string }[] = [];
+    const inner = fakeDirEntry("web-search", [[fakeFileEntry("SKILL.md")]]);
+    // Outer dir whose first batch contains a sub-dir entry.
+    const outer = {
+      isFile: false,
+      isDirectory: true,
+      name: "skills",
+      fullPath: "/skills",
+      createReader: () => {
+        let i = 0;
+        return {
+          readEntries: (cb: (entries: unknown[]) => void) => {
+            if (i++ === 0) cb([inner]);
+            else cb([]);
+          },
+        };
+      },
+    };
+    await __testables.walkEntry(outer as never, "", out);
+    expect(out.length).toBe(1);
+    expect(out[0].relativePath).toBe("skills/web-search/SKILL.md");
+  });
+});
+
+// ---- FileTree drag-drop wiring ----
+
+const file: TreeNode = { name: "config.yaml", path: "config.yaml", isDir: false, children: [], size: 0 };
+const skillsDir: TreeNode = { name: "skills", path: "skills", isDir: true, children: [], size: 0 };
+
+function renderTree(props: Partial<React.ComponentProps<typeof FileTree>> = {}) {
+  // PR-D test defaults must include PR-C's onDownload + canDelete now
+  // that they're required on the TreeCallbacks shape (the rebase
+  // surfaced this — the merged tree depends on both feature sets).
+  const defaults: React.ComponentProps<typeof FileTree> = {
+    nodes: [file, skillsDir],
+    selectedPath: null,
+    onSelect: vi.fn(),
+    onDelete: vi.fn(),
+    onDownload: vi.fn(),
+    canDelete: true,
+    onDropToTarget: vi.fn(),
+    expandedDirs: new Set<string>(),
+    onToggleDir: vi.fn(),
+    loadingDir: null,
+  };
+  const merged = { ...defaults, ...props };
+  return { ...render(<FileTree {...merged} />), props: merged };
+}
+
+describe("FileTree directory-row drag-drop", () => {
+  it("dragover on a directory row preventDefault's so the drop will fire", () => {
+    renderTree();
+    const row = screen.getByText("skills");
+    const dragOver = new Event("dragover", { bubbles: true, cancelable: true });
+    Object.defineProperty(dragOver, "dataTransfer", {
+      value: { dropEffect: "" },
+    });
+    row.parentElement!.dispatchEvent(dragOver);
+    // preventDefault registers via the React handler — without it
+    // the drop event would never fire, so this assertion is the
+    // load-bearing one.
+    expect(dragOver.defaultPrevented).toBe(true);
+  });
+
+  it("drop on a directory row fires onDropToTarget with that path + the items list", () => {
+    const { props } = renderTree();
+    const row = screen.getByText("skills").parentElement!;
+    const fakeItems = { length: 1, 0: { kind: "file" } } as unknown as DataTransferItemList;
+    fireEvent.drop(row, { dataTransfer: { items: fakeItems } });
+    expect(props.onDropToTarget).toHaveBeenCalledWith("skills", fakeItems);
+  });
+
+  it("drop on a FILE row does NOT fire onDropToTarget (only directories are valid targets)", () => {
+    const { props } = renderTree();
+    const fileRow = screen.getByText("config.yaml").parentElement!;
+    const fakeItems = { length: 1, 0: { kind: "file" } } as unknown as DataTransferItemList;
+    fireEvent.drop(fileRow, { dataTransfer: { items: fakeItems } });
+    expect(props.onDropToTarget).not.toHaveBeenCalled();
+  });
+
+  it("drop with no DataTransferItems does NOT fire onDropToTarget", () => {
+    const { props } = renderTree();
+    const row = screen.getByText("skills").parentElement!;
+    fireEvent.drop(row, { dataTransfer: { items: { length: 0 } } });
+    expect(props.onDropToTarget).not.toHaveBeenCalled();
+  });
+
+  it("dragenter sets the drop-target highlight on the directory row", () => {
+    renderTree();
+    const row = screen.getByText("skills").parentElement!;
+    fireEvent.dragEnter(row, { dataTransfer: {} });
+    // Highlight class is the discriminator — without dragenter
+    // wiring the row stays in its hover-only style.
+    expect(row.className).toMatch(/bg-accent|outline-accent/);
+  });
+});
@@ -90,6 +90,43 @@ export function useFilesApi(workspaceId: string, root: string) {
    [workspaceId]
  );

+  /**
+   * Fetch a file's content from the server and trigger a browser
+   * download. Used by the right-click "Download" context-menu item
+   * (PR-C of issue #2999) — distinct from `handleDownloadFile` in
+   * FilesTab which downloads the CURRENTLY-OPEN-IN-EDITOR file from
+   * the in-memory `editContent` buffer (so unsaved edits round-trip
+   * to disk). This helper downloads the on-server content, suitable
+   * for arbitrary tree rows the user hasn't opened.
+   */
+  const downloadFileByPath = useCallback(
+    async (path: string) => {
+      try {
+        const res = await api.get<{ content: string }>(
+          `/workspaces/${workspaceId}/files/${path}?root=${encodeURIComponent(root)}`,
+        );
+        // text/plain is correct for the canvas's text-only file
+        // surface (config.yaml, prompts, skill markdown). Binary
+        // files would need an Accept-arraybuffer path; the API
+        // returns string today so this matches the wire shape.
+        const blob = new Blob([res.content], { type: "text/plain" });
+        const url = URL.createObjectURL(blob);
+        const a = document.createElement("a");
+        a.href = url;
+        a.download = path.split("/").pop() || "file";
+        a.click();
+        URL.revokeObjectURL(url);
+        showToast(`Downloaded ${a.download}`, "success");
+      } catch (e) {
+        showToast(
+          `Download failed: ${e instanceof Error ? e.message : "unknown error"}`,
+          "error",
+        );
+      }
+    },
+    [workspaceId, root],
+  );
+
  const downloadAllFiles = useCallback(async () => {
    const fileEntries = files.filter((f) => !f.dir);
    const results = await Promise.allSettled(
@@ -114,16 +151,20 @@ export function useFilesApi(workspaceId: string, root: string) {
  }, [files, workspaceId]);

  const uploadFiles = useCallback(
-    async (fileList: FileList) => {
+    async (fileList: FileList, targetDir = "") => {
      let uploaded = 0;
      for (const file of Array.from(fileList)) {
        const path = file.webkitRelativePath || file.name;
        const parts = path.split("/");
+        // For folder picker: webkitRelativePath is "<picked-folder>/a/b.txt"
+        // — strip the picked-folder prefix so files land flat under the
+        // workspace's target dir, not under a redundant outer folder.
        const relPath = parts.length > 1 ? parts.slice(1).join("/") : parts[0];
+        const finalPath = targetDir ? `${targetDir}/${relPath}` : relPath;
        if (file.size > 1_000_000) continue;
        try {
          const content = await file.text();
-          await api.put(`/workspaces/${workspaceId}/files/${relPath}`, { content });
+          await api.put(`/workspaces/${workspaceId}/files/${finalPath}`, { content });
          uploaded++;
        } catch {
          /* skip binary */
@@ -131,7 +172,7 @@ export function useFilesApi(workspaceId: string, root: string) {
      }
      if (uploaded > 0) {
        useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: true });
-        showToast(`Uploaded ${uploaded} files`, "success");
+        showToast(`Uploaded ${uploaded} files${targetDir ? ` to ${targetDir}` : ""}`, "success");
        loadFiles();
      }
      return uploaded;
@@ -139,6 +180,58 @@ export function useFilesApi(workspaceId: string, root: string) {
    [workspaceId, loadFiles]
  );

+  /**
+   * Upload files dragged from the OS via the HTML5 DataTransferItemList
+   * API. Unlike the folder-picker path (uploadFiles), this preserves
+   * the dropped folder structure under `targetDir` — drag a "skills/"
+   * folder onto the /configs/skills row and you get
+   * /configs/skills/skills/* (the OUTER folder name is preserved
+   * because the user explicitly chose to drop a NAMED folder, unlike
+   * the folder-picker which always wraps the picked dir).
+   *
+   * Walks FileSystemDirectoryEntry recursively via webkitGetAsEntry.
+   * VSCode/JupyterLab use the same primitive — there's no other
+   * portable browser API for "drag a folder from OS". `webkit*`
+   * naming is a Chromium relic; Firefox + Safari implement the same
+   * surface.
+   *
+   * Returns the number of files uploaded so the caller can show a
+   * tally / fail toast.
+   */
+  const uploadDataTransferItems = useCallback(
+    async (items: DataTransferItemList, targetDir = "") => {
+      const fileEntries = collectFileEntries(items);
+      let uploaded = 0;
+      for (const { file, relativePath } of await fileEntries) {
+        if (file.size > 1_000_000) continue;
+        const finalPath = targetDir
+          ? `${targetDir}/${relativePath}`
+          : relativePath;
+        try {
+          const content = await file.text();
+          await api.put(`/workspaces/${workspaceId}/files/${finalPath}`, {
+            content,
+          });
+          uploaded++;
+        } catch {
+          /* skip binary */
+        }
+      }
+      if (uploaded > 0) {
+        useCanvasStore
+          .getState()
+          .updateNodeData(workspaceId, { needsRestart: true });
+        showToast(
+          `Uploaded ${uploaded} file${uploaded === 1 ? "" : "s"}${targetDir ? ` to ${targetDir}` : ""}`,
+          "success",
+        );
+        loadFiles();
+      }
+      return uploaded;
+    },
+    [workspaceId, loadFiles],
+  );
+
  const deleteAllFiles = useCallback(async () => {
    let deleted = 0;
    for (const f of files) {
@@ -165,8 +258,98 @@ export function useFilesApi(workspaceId: string, root: string) {
    readFile,
    writeFile,
    deleteFile,
+    downloadFileByPath,
    downloadAllFiles,
    uploadFiles,
+    uploadDataTransferItems,
    deleteAllFiles,
  };
 }
+
+// ----- DataTransfer entry walker (PR-D) ---------------------------------
+
+/**
+ * Minimal subset of the FileSystem Entry API surface we use. The DOM
+ * lib types this as FileSystemEntry / FileSystemFileEntry /
+ * FileSystemDirectoryEntry but the relevant methods are callback-
+ * based. Keep the shape narrow + explicit so the recursion below
+ * type-checks without pulling in the full DOM lib types.
+ */
+interface FSEntry {
+  isFile: boolean;
+  isDirectory: boolean;
+  name: string;
+  fullPath: string;
+  file?(success: (f: File) => void, fail?: (e: unknown) => void): void;
+  createReader?(): { readEntries(success: (entries: FSEntry[]) => void): void };
+}
+
+interface CollectedEntry {
+  file: File;
+  /** Path relative to the dropped root (e.g. "skills/web-search/SKILL.md"
+   *  for a dropped "skills/" folder containing web-search/SKILL.md). */
+  relativePath: string;
+}
+
+/**
+ * Walk a DataTransferItemList, returning every file entry as a flat
+ * array keyed by the path relative to the originally-dropped item.
+ * Folders dropped from the OS expand recursively; loose files
+ * passthrough with name as the relative path.
+ *
+ * Skips items where webkitGetAsEntry() returns null — that's how
+ * the browser signals a non-file payload (e.g. a dragged URL or
+ * text snippet).
+ */
+async function collectFileEntries(
+  items: DataTransferItemList,
+): Promise<CollectedEntry[]> {
+  const out: CollectedEntry[] = [];
+  for (let i = 0; i < items.length; i++) {
+    const item = items[i];
+    if (item.kind !== "file") continue;
+    // webkitGetAsEntry is the standardised name; older Firefox used
+    // getAsEntry. Both Chromium + Firefox + Safari ship the webkit-
+    // prefixed variant today. There's no non-prefixed alternative.
+    const entry = (item as DataTransferItem & {
+      webkitGetAsEntry?: () => FSEntry | null;
+    }).webkitGetAsEntry?.();
+    if (!entry) continue;
+    await walkEntry(entry, "", out);
+  }
+  return out;
+}
+
+async function walkEntry(
+  entry: FSEntry,
+  prefix: string,
+  out: CollectedEntry[],
+): Promise<void> {
+  const name = entry.name;
+  const relPath = prefix ? `${prefix}/${name}` : name;
+  if (entry.isFile && entry.file) {
+    const file = await new Promise<File>((resolve, reject) => {
+      entry.file!(resolve, reject);
+    });
+    out.push({ file, relativePath: relPath });
+    return;
+  }
+  if (entry.isDirectory && entry.createReader) {
+    const reader = entry.createReader();
+    // readEntries returns up to ~100 at a time on Chromium; loop
+    // until empty so large folders aren't truncated.
+    let batch: FSEntry[] = [];
+    do {
+      batch = await new Promise<FSEntry[]>((resolve) =>
+        reader.readEntries(resolve),
+      );
+      for (const child of batch) {
+        await walkEntry(child, relPath, out);
+      }
+    } while (batch.length > 0);
+  }
+}
+
+// Exported for direct testing — the recursion + readEntries batching
+// is the part most likely to silently truncate a real folder upload.
+export const __testables = { collectFileEntries, walkEntry };
@@ -297,10 +297,49 @@ export function SkillsTab({ workspaceId, data }: Props) {
    }
  };

+  // Compact-empty pattern: when the workspace has zero plugins
+  // installed AND the registry isn't open, collapse the whole
+  // "Plugins" section into a single inline pill rather than rendering
+  // the full panel chrome. Reported on production 2026-05-05 (#2971):
+  // the empty state's panel-with-zero-list-rows layout gives the user
+  // a lot of vertical real estate for content that's just "0
+  // installed + Install button". The compact form keeps that
+  // affordance without the chrome.
+  //
+  // Expanded/full layout still fires when installed.length > 0 OR
+  // when the user opens the registry (clicked "+ Install Plugin").
+  // Once a plugin is installed the section auto-expands to surface
+  // the list.
+  const compactEmpty = installed.length === 0 && !showRegistry && installedLoaded;
+
+  if (compactEmpty) {
+    return (
+      <div className="p-4 space-y-4">
+        <div
+          className="flex items-center justify-between gap-2 rounded-full border border-line/60 bg-surface-sunken/70 px-3 py-1.5"
+          aria-label="Plugins (none installed)"
+        >
+          <div className="flex items-center gap-2">
+            <span className="text-[10px] uppercase tracking-[0.2em] text-ink-soft">Plugins</span>
+            <span className="text-[11px] text-ink-mid">0 installed</span>
+          </div>
+          <button
+            onClick={() => setShowRegistry(true)}
+            className="rounded-full border border-violet-700/50 bg-violet-950/30 px-3 py-0.5 text-[10px] text-violet-200 hover:bg-violet-900/40 transition-colors"
+            aria-expanded="false"
+            aria-controls="plugins-section"
+          >
+            + Install Plugin
+          </button>
+        </div>
+      </div>
+    );
+  }
+
  return (
    <div className="p-4 space-y-4">
      {/* Plugins section */}
-      <div className="rounded-xl border border-line bg-surface-sunken/70 p-3">
+      <div id="plugins-section" className="rounded-xl border border-line bg-surface-sunken/70 p-3">
        <div className="flex items-center justify-between gap-3">
          <div>
            <div className="text-[10px] uppercase tracking-[0.22em] text-ink-soft">Plugins</div>
@@ -311,6 +350,8 @@ export function SkillsTab({ workspaceId, data }: Props) {
          <button
            onClick={() => setShowRegistry(!showRegistry)}
            className="rounded-full border border-violet-700/50 bg-violet-950/30 px-3 py-1 text-[10px] text-violet-200 hover:bg-violet-900/40 transition-colors"
+            aria-expanded={showRegistry}
+            aria-controls="plugins-registry"
          >
            {showRegistry ? "Hide Registry" : "+ Install Plugin"}
          </button>
@@ -1,13 +1,11 @@
 // @vitest-environment jsdom
 //
-// Pins the lazy-loading chat-history pagination added 2026-05-05.
+// Pins the lazy-loading chat-history pagination.
 //
-// Pre-fix: ChatTab fetched the newest 50 messages on every mount and
-// scrolled to bottom, paying full DOM cost up-front even when the user
-// only wanted to read the last few bubbles. Post-fix: initial load is
-// bounded to 10 newest, and an IntersectionObserver on a top sentinel
-// triggers loadOlder() (batch of 20 with `before_ts` cursor) when the
-// user scrolls up.
+// PR-C-2 (RFC #2945): canvas was migrated from /activity?type=a2a_receive
+// to /chat-history. Server now returns typed ChatMessage[] in
+// display-ready oldest-first order. These tests guard the canvas-side
+// pagination invariants against the new endpoint surface.
 //
 // Pinned branches:
 //   1. Initial fetch carries `limit=10` and NO before_ts (newest-first
@@ -20,11 +18,10 @@
 //      asserting the rendered bubble count matches the full page).
 //   4. The retry button after a failed initial load uses the same
 //      INITIAL_HISTORY_LIMIT (10), not the legacy 50.
-//
-// IntersectionObserver / scroll-anchor restoration is exercised by the
-// E2E synth-canary suite — pinning it in jsdom would require mocking
-// the observer and faking layout, which is brittler than trusting a
-// live-DOM canary against the staging tenant.
+//   5. before_ts cursor is the OLDEST timestamp from the current page,
+//      passed verbatim to walk backward.
+//   6. Inflight guard rejects duplicate IO triggers while a loadOlder
+//      fetch is in flight.

 import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
 import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
@@ -33,24 +30,31 @@ import React from "react";
 afterEach(cleanup);

 // Both ChatTab sub-panels (MyChat + AgentComms) mount simultaneously so
-// keyboard tab order and aria-controls land on a real DOM. Both fire
-// /activity GETs on mount: MyChat's hits `type=a2a_receive&source=canvas`,
-// AgentComms's hits a different filter. Route the mock by URL so each
-// gets a sensible default and only MyChat's call is what the assertions
-// scrutinise.
-const myChatActivityCalls: string[] = [];
-let myChatNextResponse: { ok: true; rows: unknown[] } | { ok: false; err: Error } = {
-  ok: true,
-  rows: [],
-};
+// keyboard tab order and aria-controls land on a real DOM. MyChat's
+// loadMessagesFromDB hits /chat-history; AgentComms's polling hits a
+// different URL. Route the mock by URL so each gets a sensible default
+// and only MyChat's calls land in the assertion array.
+const myChatHistoryCalls: string[] = [];
+let myChatNextResponse:
+  | { ok: true; messages: unknown[]; reachedEnd?: boolean }
+  | { ok: false; err: Error } = { ok: true, messages: [] };
+
 const apiGet = vi.fn((path: string): Promise<unknown> => {
-  if (path.includes("type=a2a_receive") && path.includes("source=canvas")) {
-    myChatActivityCalls.push(path);
-    if (myChatNextResponse.ok) return Promise.resolve(myChatNextResponse.rows);
+  if (path.includes("/chat-history")) {
+    myChatHistoryCalls.push(path);
+    if (myChatNextResponse.ok) {
+      const reached_end =
+        myChatNextResponse.reachedEnd !== undefined
+          ? myChatNextResponse.reachedEnd
+          : myChatNextResponse.messages.length < 10;
+      return Promise.resolve({
+        messages: myChatNextResponse.messages,
+        reached_end,
+      });
+    }
    return Promise.reject(myChatNextResponse.err);
  }
-  // AgentComms / heartbeat / anything else — empty array is a safe
-  // default that won't blow up the corresponding component's .then().
+  // AgentComms / heartbeat / anything else — empty array safe default.
  return Promise.resolve([]);
 });
 const apiPost = vi.fn();
@@ -84,8 +88,8 @@ const ioInstances: IOInstance[] = [];
 beforeEach(() => {
  apiGet.mockClear();
  apiPost.mockReset();
-  myChatActivityCalls.length = 0;
-  myChatNextResponse = { ok: true, rows: [] };
+  myChatHistoryCalls.length = 0;
+  myChatNextResponse = { ok: true, messages: [] };
  ioInstances.length = 0;
  class FakeIO {
    private inst: IOInstance;
@@ -101,20 +105,12 @@ beforeEach(() => {
      this.inst.disconnected = true;
    }
  }
-  // Install on every reachable global — different bundlers / module
-  // graphs can resolve `IntersectionObserver` via `window`, `globalThis`,
-  // or the bare global. Without all three, jsdom's own (pre-existing)
-  // stub silently wins and ioInstances stays empty.
  (window as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
  (globalThis as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
-  // jsdom doesn't implement scrollIntoView; ChatTab calls it after every
-  // messages update.
  Element.prototype.scrollIntoView = vi.fn();
 });

 function triggerIntersection(instanceIdx = -1) {
-  // -1 → the latest observer (the live one). Tests targeting an old
-  // (disconnected) instance pass a positive index.
  const inst = ioInstances.at(instanceIdx);
  if (!inst) throw new Error(`no IO instance at ${instanceIdx}`);
  inst.callback(
@@ -125,25 +121,30 @@ function triggerIntersection(instanceIdx = -1) {

 import { ChatTab } from "../ChatTab";

-function makeActivityRow(seq: number): Record<string, unknown> {
-  // Zero-pad seq into the minute slot so "seq=10" doesn't produce
-  // the invalid timestamp "00:010:00Z" (caught by the loadOlder URL
-  // assertion below — first version of the helper used `0${seq}` and
-  // the test failed on `before_ts` having an extra digit).
+// makeMessagePair returns a (user, agent) pair sharing a timestamp,
+// matching the wire shape /chat-history emits per activity_logs row.
+// Server-side reverseRowChunks ensures the wire is oldest-first across
+// rows but [user, agent] within each row.
+function makeMessagePair(seq: number): unknown[] {
+  // Zero-pad seq into the minute slot so seq=10 produces a valid
+  // timestamp (00:10:00Z, not 00:010:00Z).
  const mm = String(seq).padStart(2, "0");
-  return {
-    activity_type: "a2a_receive",
-    status: "ok",
-    created_at: `2026-05-05T00:${mm}:00Z`,
-    request_body: { params: { message: { parts: [{ kind: "text", text: `user msg ${seq}` }] } } },
-    response_body: { result: `agent reply ${seq}` },
-  };
+  const ts = `2026-05-05T00:${mm}:00Z`;
+  return [
+    { id: `u-${seq}`, role: "user", content: `user msg ${seq}`, timestamp: ts },
+    { id: `a-${seq}`, role: "agent", content: `agent reply ${seq}`, timestamp: ts },
+  ];
 }

-// Server returns newest-first; the helper builds a server-shape page
-// so the order in the rendered messages array matches production.
-function newestFirstPage(start: number, count: number): unknown[] {
-  return Array.from({ length: count }, (_, i) => makeActivityRow(start + count - 1 - i));
+// pageOldestFirst builds a wire-shape page (oldest-first within page)
+// of `count` row-pairs starting at seq=`start`. Mirrors the server's
+// post-reverseRowChunks emission order.
+function pageOldestFirst(start: number, count: number): unknown[] {
+  const out: unknown[] = [];
+  for (let i = 0; i < count; i++) {
+    out.push(...makeMessagePair(start + i));
+  }
+  return out;
 }

 const minimalData = {
@@ -153,28 +154,30 @@ const minimalData = {
 } as unknown as Parameters<typeof ChatTab>[0]["data"];

 describe("ChatTab lazy history pagination", () => {
-  it("initial fetch carries limit=10 (not the legacy 50)", async () => {
-    myChatNextResponse = { ok: true, rows: [makeActivityRow(1)] };
+  it("initial fetch carries limit=10 (not the legacy 50) and hits /chat-history", async () => {
+    myChatNextResponse = { ok: true, messages: makeMessagePair(1) };
    render(<ChatTab workspaceId="ws-1" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
-    const url = myChatActivityCalls[0];
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
+    const url = myChatHistoryCalls[0];
+    expect(url).toContain("/chat-history");
    expect(url).toContain("limit=10");
    expect(url).not.toContain("limit=50");
    // before_ts should NOT be set on the initial fetch — that's the
    // newest-first slice the user lands on.
    expect(url).not.toContain("before_ts");
+    // /chat-history filters source-canvas server-side; client should
+    // NOT pass type/source params (they belonged to /activity).
+    expect(url).not.toContain("type=a2a_receive");
+    expect(url).not.toContain("source=canvas");
  });

  it("hides the top sentinel when initial fetch returns fewer than the limit", async () => {
    // 3 < 10 → server says "no more older history exists"; sentinel
    // should NOT mount and the "Loading older messages…" line should
-    // never appear (it can't, since the sentinel is what triggers it).
-    myChatNextResponse = {
-      ok: true,
-      rows: [makeActivityRow(1), makeActivityRow(2), makeActivityRow(3)],
-    };
+    // never appear.
+    myChatNextResponse = { ok: true, messages: pageOldestFirst(1, 3) };
    render(<ChatTab workspaceId="ws-2" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
    await waitFor(() => {
      expect(screen.queryByText(/Loading chat history/i)).toBeNull();
    });
@@ -182,15 +185,15 @@ describe("ChatTab lazy history pagination", () => {
  });

  it("renders all messages when initial fetch returns exactly the limit", async () => {
-    // 10 == limit → server might have more older rows; sentinel SHOULD
-    // mount so the IO observer can fire loadOlder() on scroll-up. We
-    // verify by checking the rendered bubble count — if hasMore stayed
-    // true the sentinel render path doesn't crash and all 10 rows
-    // produced their pair of bubbles.
-    const fullPage = Array.from({ length: 10 }, (_, i) => makeActivityRow(i + 1));
-    myChatNextResponse = { ok: true, rows: fullPage };
+    // limit=10 row-pairs → 20 ChatMessages. reachedEnd should be FALSE
+    // so the sentinel mounts. Verified by bubble counts.
+    myChatNextResponse = {
+      ok: true,
+      messages: pageOldestFirst(1, 10),
+      reachedEnd: false,
+    };
    render(<ChatTab workspaceId="ws-3" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
    await waitFor(() => {
      expect(screen.queryByText(/Loading chat history/i)).toBeNull();
    });
@@ -202,54 +205,67 @@ describe("ChatTab lazy history pagination", () => {
    myChatNextResponse = { ok: false, err: new Error("network down") };
    render(<ChatTab workspaceId="ws-4" data={minimalData} />);
    const retry = await screen.findByText(/Retry/);
-    myChatNextResponse = { ok: true, rows: [makeActivityRow(1)] };
+    myChatNextResponse = { ok: true, messages: makeMessagePair(1) };
    fireEvent.click(retry);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
-    const retryUrl = myChatActivityCalls[1];
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(2));
+    const retryUrl = myChatHistoryCalls[1];
+    expect(retryUrl).toContain("/chat-history");
    expect(retryUrl).toContain("limit=10");
    expect(retryUrl).not.toContain("limit=50");
  });

  it("loadOlder fetches limit=20 with before_ts=oldest.timestamp", async () => {
-    // Initial page = 10 rows in newest-first order (seq 10..1). After
-    // the component reverses to oldest-first for display, messages[0]
-    // is built from seq=1 — the oldest — and its timestamp is what
-    // before_ts should carry.
-    myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
+    // Initial page = 10 row-pairs in oldest-first order (seq 1..10).
+    // The oldest (and so the cursor for loadOlder) is seq=1's
+    // timestamp 2026-05-05T00:01:00Z.
+    myChatNextResponse = {
+      ok: true,
+      messages: pageOldestFirst(1, 10),
+      reachedEnd: false,
+    };
    render(<ChatTab workspaceId="ws-load-older" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
    await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));

-    // Stage the older-batch response, then fire the IO callback.
-    myChatNextResponse = { ok: true, rows: newestFirstPage(0, 1) };
+    // Stage older-batch response, then fire IO callback.
+    myChatNextResponse = {
+      ok: true,
+      messages: pageOldestFirst(0, 1),
+      reachedEnd: true,
+    };
    triggerIntersection();

-    await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
-    const olderUrl = myChatActivityCalls[1];
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(2));
+    const olderUrl = myChatHistoryCalls[1];
+    expect(olderUrl).toContain("/chat-history");
    expect(olderUrl).toContain("limit=20");
    expect(olderUrl).toContain("before_ts=");
    expect(decodeURIComponent(olderUrl)).toContain("before_ts=2026-05-05T00:01:00Z");
  });

  it("inflight guard rejects a second IO trigger while first loadOlder is in flight", async () => {
-    myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
+    myChatNextResponse = {
+      ok: true,
+      messages: pageOldestFirst(1, 10),
+      reachedEnd: false,
+    };
    render(<ChatTab workspaceId="ws-inflight" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
    await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));

    // Hold the next loadOlder fetch open with a manual deferred so we
    // can fire the second trigger while the first is in-flight.
-    let release!: (rows: unknown[]) => void;
-    const deferred = new Promise<unknown[]>((res) => {
+    let release!: (resp: unknown) => void;
+    const deferred = new Promise<unknown>((res) => {
      release = res;
    });
    apiGet.mockImplementationOnce((path: string): Promise<unknown> => {
-      myChatActivityCalls.push(path);
+      myChatHistoryCalls.push(path);
      return deferred;
    });

    triggerIntersection(); // start loadOlder #1
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(2));

    // Second IO trigger lands while #1 is still pending.
    triggerIntersection();
@@ -258,79 +274,62 @@ describe("ChatTab lazy history pagination", () => {
    // Without the inflight guard, each of these would have started a
    // new fetch. With the guard, none of them do — call count stays 2.
    await new Promise((r) => setTimeout(r, 10));
-    expect(myChatActivityCalls.length).toBe(2);
+    expect(myChatHistoryCalls.length).toBe(2);

-    // Release the first fetch. Inflight clears in the finally block;
-    // a subsequent IO trigger is permitted again (verified by checking
-    // we can fire a follow-up after release without hanging the test).
-    release([]);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
+    // Release the first fetch with a valid wire response shape.
+    release({ messages: [], reached_end: true });
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(2));
  });

  it("empty older response clears the scroll anchor and unmounts the sentinel", async () => {
-    // The bug we're pinning: if loadOlder returns 0 rows, the
-    // scrollAnchorRef must be cleared so the next paint doesn't try to
-    // restore against a no-op prepend (which would fight the natural
-    // bottom-pin for any subsequent live message). hasMore flipping to
-    // false is the same flag-flip path; sentinel disappearing is the
-    // observable proxy.
-    myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
+    myChatNextResponse = {
+      ok: true,
+      messages: pageOldestFirst(1, 10),
+      reachedEnd: false,
+    };
    render(<ChatTab workspaceId="ws-anchor" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
    await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));

-    myChatNextResponse = { ok: true, rows: [] }; // empty → reachedEnd
+    myChatNextResponse = {
+      ok: true,
+      messages: [],
+      reachedEnd: true,
+    };
    triggerIntersection();
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(2));

-    // After reachedEnd the sentinel unmounts (hasMore=false). We can't
-    // peek scrollAnchorRef directly, but we can assert the consequence:
-    // scrollIntoView (the bottom-pin for live appends) is not blocked
-    // by a stale anchor. Trigger a re-render via an unrelated state
-    // change… in practice the safest assertion here is that the
-    // sentinel disappeared (proving the empty response propagated to
-    // hasMore correctly, which is the same flag-flip path as anchor
-    // clearing).
    await waitFor(() => {
      expect(screen.queryByText(/Loading older messages/i)).toBeNull();
    });
  });

  it("IntersectionObserver does not churn when older messages prepend", async () => {
-    // Whole-PR perf invariant: prepending older history (the load-bearing
-    // user gesture) must NOT tear down + re-arm the IO observer.
-    // Triggering loadOlder is the cleanest way to drive a messages
-    // mutation from inside the test, since live agent push goes through
-    // a Zustand store that's harder to drive reliably from jsdom.
-    //
-    // Pre-fix, loadOlder depended on `messages`, so every prepend
-    // recreated loadOlder → re-ran the IO effect → new observer. Each
-    // call to triggerIntersection() produced a fresh disconnected
-    // observer + a new live one. Post-fix, the observer survives.
-    myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
+    myChatNextResponse = {
+      ok: true,
+      messages: pageOldestFirst(1, 10),
+      reachedEnd: false,
+    };
    render(<ChatTab workspaceId="ws-stable-io" data={minimalData} />);
-    await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
+    await waitFor(() => expect(myChatHistoryCalls.length).toBe(1));
    await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));

-    // Snapshot the observer instance after first paint stabilises.
    const observerBefore = ioInstances.at(-1);
    expect(observerBefore).toBeDefined();
    expect(observerBefore!.disconnected).toBe(false);

    // Trigger three older-batch prepends. Each batch returns the full
-    // OLDER_HISTORY_BATCH (20 rows) so reachedEnd stays false and the
-    // sentinel keeps mounting. Pre-fix, each prepend mutated `messages`
-    // → recreated loadOlder → re-ran the IO effect → new observer.
+    // OLDER_HISTORY_BATCH (20 row-pairs = 40 messages) so reachedEnd
+    // stays false and the sentinel keeps mounting.
    for (let batch = 0; batch < 3; batch++) {
      myChatNextResponse = {
        ok: true,
-        rows: newestFirstPage(-(batch + 1) * 20, 20),
+        messages: pageOldestFirst(-(batch + 1) * 20, 20),
+        reachedEnd: false,
      };
-      const callsBefore = myChatActivityCalls.length;
+      const callsBefore = myChatHistoryCalls.length;
      triggerIntersection();
-      await waitFor(() =>
-        expect(myChatActivityCalls.length).toBe(callsBefore + 1),
-      );
+      await waitFor(() => expect(myChatHistoryCalls.length).toBe(callsBefore + 1));
    }

    // The original observer is still the live one — no churn.
@@ -0,0 +1,119 @@
+// @vitest-environment jsdom
+//
+// Pins the "Files not available" early-return for runtimes whose
+// filesystem the platform doesn't own (today: runtime === "external").
+//
+// Pre-fix: FilesTab issued a GET /workspaces/<id>/files for every
+// workspace. The platform's response for an external workspace is
+// always [] (no rows in workspace_files), but the canvas rendered
+// "0 files / No config files yet" — visually identical to the SaaS
+// empty-listing bug fixed in PR-A. The placeholder makes the absence
+// intentional.
+//
+// Pinned branches:
+//   1. external runtime → "Files not available" banner renders,
+//      runtime name surfaces in the body so user knows WHY.
+//   2. external runtime → useFilesApi is NOT invoked. Verified by
+//      asserting the mocked api.get was never called.
+//   3. claude-code (or any other runtime) → no banner, normal mount
+//      proceeds (`/configs` toolbar visible). Pre-fix regression cover.
+//   4. data prop omitted (legacy callers) → no early-return, falls
+//      through to normal mount.
+
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen, cleanup, waitFor } from "@testing-library/react";
+import React from "react";
+
+afterEach(cleanup);
+
+// Mock the api module so the normal-mount branches don't try to
+// fetch against a real backend — and so we can assert the
+// external-runtime branch never fires a request.
+const apiCalls: string[] = [];
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: vi.fn((path: string) => {
+      apiCalls.push(path);
+      return Promise.resolve([]);
+    }),
+    put: vi.fn(() => Promise.resolve()),
+    del: vi.fn(() => Promise.resolve()),
+  },
+}));
+
+// useCanvasStore is referenced by useFilesApi for the needsRestart
+// flag. The Toaster import inside FilesTab also pulls the store
+// indirectly. Stub minimally to satisfy the import chain.
+vi.mock("@/store/canvas", async () => {
+  const actual = await vi.importActual<typeof import("@/store/canvas")>(
+    "@/store/canvas",
+  );
+  return {
+    ...actual,
+    useCanvasStore: {
+      getState: () => ({
+        updateNodeData: vi.fn(),
+      }),
+    },
+  };
+});
+
+vi.mock("../Toaster", () => ({
+  showToast: vi.fn(),
+}));
+
+beforeEach(() => {
+  apiCalls.length = 0;
+});
+
+import { FilesTab } from "../FilesTab";
+
+const externalData = { runtime: "external", status: "online" } as unknown as Parameters<
+  typeof FilesTab
+>[0]["data"];
+
+const claudeData = { runtime: "claude-code", status: "online" } as unknown as Parameters<
+  typeof FilesTab
+>[0]["data"];
+
+describe("FilesTab not-available early-return for runtimes without platform-owned filesystem", () => {
+  it("external runtime renders the not-available banner with runtime name", () => {
+    render(<FilesTab workspaceId="ws-ext" data={externalData} />);
+    expect(screen.getByText(/Files not available/i)).not.toBeNull();
+    // Runtime name must surface so the user understands WHY — without
+    // it the placeholder reads as a generic error.
+    expect(screen.getByText(/external/)).not.toBeNull();
+    // Chat tab is the recommended alternative — flagged in copy so the
+    // user knows where to go next instead of bouncing tabs.
+    expect(screen.getByText(/Chat tab/i)).not.toBeNull();
+  });
+
+  it("external runtime does NOT issue any /files API call", async () => {
+    render(<FilesTab workspaceId="ws-ext" data={externalData} />);
+    // Tolerate one microtask boundary in case useEffect schedules.
+    await new Promise((r) => setTimeout(r, 0));
+    const filesCalls = apiCalls.filter((p) => p.includes("/files"));
+    expect(filesCalls).toEqual([]);
+  });
+
+  it("claude-code runtime does NOT render the banner (normal mount)", async () => {
+    render(<FilesTab workspaceId="ws-claude" data={claudeData} />);
+    // The normal-mount path renders the FilesToolbar with the root
+    // selector. Wait for it (useEffect → loadFiles → setLoading false).
+    await waitFor(() => {
+      expect(screen.queryByText(/Files not available/i)).toBeNull();
+    });
+    // Toolbar's root selector confirms we're on the platform-owned
+    // rendering path, not the placeholder.
+    expect(screen.getByLabelText(/File root directory/i)).not.toBeNull();
+  });
+
+  it("data prop omitted falls through to normal mount (back-compat)", async () => {
+    render(<FilesTab workspaceId="ws-no-data" />);
+    await waitFor(() => {
+      expect(screen.queryByText(/Files not available/i)).toBeNull();
+    });
+    // Without data we can't gate on runtime — must mount normally.
+    expect(screen.getByLabelText(/File root directory/i)).not.toBeNull();
+  });
+});
@@ -0,0 +1,141 @@
+// @vitest-environment jsdom
+//
+// Pins the compact-when-empty layout for the SkillsTab Plugins section
+// (issue #2971, reported on production 2026-05-05).
+//
+// Three states matter for layout:
+//   1. installed.length === 0 + registry closed + load completed → COMPACT pill
+//   2. installed.length > 0  → FULL panel + installed list
+//   3. registry open (showRegistry=true) → FULL panel + registry browser
+//
+// The compact-empty path is the new behavior; the other two were
+// pre-existing. This test pins all three so a future refactor that
+// over-collapses (showing compact when plugins are installed) or
+// over-expands (showing full panel on empty load) fails loudly.
+
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen, cleanup, fireEvent, waitFor } from "@testing-library/react";
+import React from "react";
+
+afterEach(cleanup);
+
+const apiGet = vi.fn();
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: (path: string, opts?: unknown) => apiGet(path, opts),
+    post: vi.fn(() => Promise.resolve({})),
+    del: vi.fn(),
+    patch: vi.fn(),
+    put: vi.fn(),
+  },
+}));
+
+beforeEach(() => {
+  apiGet.mockReset();
+  Element.prototype.scrollIntoView = vi.fn();
+});
+
+import { SkillsTab } from "../SkillsTab";
+
+const minimalData = {
+  status: "online" as const,
+  runtime: "claude-code",
+  currentTask: "",
+  agentCard: undefined,
+} as unknown as Parameters<typeof SkillsTab>[0]["data"];
+
+describe("SkillsTab Plugins compact-empty layout", () => {
+  it("renders compact pill when installed.length === 0 and registry closed", async () => {
+    // Both fetches return empty arrays — workspace is fresh, no plugins.
+    apiGet.mockImplementation((path: string) => {
+      if (path.endsWith("/plugins") || path === "/plugins" || path === "/plugins/sources") {
+        return Promise.resolve([]);
+      }
+      return Promise.resolve([]);
+    });
+    render(<SkillsTab workspaceId="ws-fresh" data={minimalData} />);
+
+    // Wait for the installedLoaded gate to flip — without that the
+    // component renders a "loading" state, not the compact pill.
+    await waitFor(() => {
+      expect(screen.getByLabelText(/Plugins \(none installed\)/i)).toBeTruthy();
+    });
+
+    // Compact assertions: the rounded-xl panel chrome MUST NOT be in
+    // the DOM (we'd see two "Plugins" labels — one in the header,
+    // one in the pill — if the layout regressed to "always full
+    // panel"). The compact form has exactly one "Plugins" label.
+    const labels = screen.getAllByText("Plugins");
+    expect(labels).toHaveLength(1);
+
+    // The full-panel chrome's id="plugins-section" should NOT be
+    // rendered when we're in compact mode.
+    expect(document.getElementById("plugins-section")).toBeNull();
+  });
+
+  it("renders full panel when installed.length > 0", async () => {
+    apiGet.mockImplementation((path: string) => {
+      if (path.endsWith("/plugins")) {
+        return Promise.resolve([
+          { name: "memory-postgres", version: "1.0.0", description: "memory backend", supported_on_runtime: true },
+        ]);
+      }
+      return Promise.resolve([]);
+    });
+    render(<SkillsTab workspaceId="ws-installed" data={minimalData} />);
+
+    await waitFor(() => {
+      expect(screen.getByText(/1 installed/i)).toBeTruthy();
+    });
+
+    // Full-panel chrome MUST be present — id pin.
+    expect(document.getElementById("plugins-section")).not.toBeNull();
+    // Compact pill ariaLabel MUST NOT be present.
+    expect(screen.queryByLabelText(/Plugins \(none installed\)/i)).toBeNull();
+  });
+
+  it("expands to full panel when user clicks + Install Plugin from compact pill", async () => {
+    apiGet.mockImplementation(() => Promise.resolve([]));
+    render(<SkillsTab workspaceId="ws-expand" data={minimalData} />);
+
+    // Start compact — wait for the compact pill to settle so we click
+    // the right button (initial render before installedLoaded flips
+    // doesn't have either layout, and the post-load compact pill is
+    // what we want to interact with).
+    await waitFor(() => {
+      expect(screen.getByLabelText(/Plugins \(none installed\)/i)).toBeTruthy();
+    });
+    const installBtn = screen.getByRole("button", { name: /\+ Install Plugin/i });
+    expect(installBtn.getAttribute("aria-expanded")).toBe("false");
+
+    fireEvent.click(installBtn);
+
+    // After click, registry opens → full panel renders. The compact
+    // pill's aria-label should be gone; the full-panel id should
+    // appear. Generous waitFor — a registry fetch may also fire in
+    // the React effect chain, and we want to assert the compact →
+    // full transition without racing it.
+    await waitFor(
+      () => {
+        expect(document.getElementById("plugins-section")).not.toBeNull();
+      },
+      { timeout: 3000 },
+    );
+    expect(screen.queryByLabelText(/Plugins \(none installed\)/i)).toBeNull();
+  });
+
+  it("does NOT collapse to compact while initial load is pending (avoid flash)", () => {
+    // Returning a never-resolving promise means installedLoaded stays
+    // false. The compact pill MUST NOT render in this state — that
+    // would flash compact → full as the load completes, which looks
+    // janky. The component shows a loading shell instead (the
+    // existing pre-fix behavior).
+    apiGet.mockImplementation(() => new Promise(() => {}));
+    render(<SkillsTab workspaceId="ws-loading" data={minimalData} />);
+
+    // Synchronous assertion — no waitFor — since we want to confirm
+    // the compact pill is NOT rendered before any network round-trip
+    // finishes.
+    expect(screen.queryByLabelText(/Plugins \(none installed\)/i)).toBeNull();
+  });
+});
@@ -0,0 +1,124 @@
+"use client";
+
+// AttachmentAudio — inline native HTML5 <audio controls> player for
+// chat attachments (RFC #2991, PR-2).
+//
+// Same auth + Blob-URL pattern as AttachmentImage / AttachmentVideo.
+// Native audio control bar handles play/pause/scrub/volume/download,
+// and there's no fullscreen UI to worry about (audio doesn't need
+// AttachmentLightbox).
+
+import { useState, useEffect, useRef } from "react";
+import type { ChatAttachment } from "./types";
+import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
+import { AttachmentChip } from "./AttachmentViews";
+
+interface Props {
+  workspaceId: string;
+  attachment: ChatAttachment;
+  onDownload: (a: ChatAttachment) => void;
+  tone: "user" | "agent";
+}
+
+type FetchState =
+  | { kind: "idle" }
+  | { kind: "loading" }
+  | { kind: "ready"; src: string }
+  | { kind: "error" };
+
+export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: Props) {
+  const [state, setState] = useState<FetchState>({ kind: "idle" });
+  const blobUrlRef = useRef<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    setState({ kind: "loading" });
+
+    if (!isPlatformAttachment(attachment.uri)) {
+      const href = resolveAttachmentHref(workspaceId, attachment.uri);
+      if (!cancelled) setState({ kind: "ready", src: href });
+      return;
+    }
+
+    void (async () => {
+      try {
+        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        const res = await fetch(href, {
+          headers,
+          credentials: "include",
+          signal: AbortSignal.timeout(60_000),
+        });
+        if (!res.ok) {
+          if (!cancelled) setState({ kind: "error" });
+          return;
+        }
+        const blob = await res.blob();
+        const url = URL.createObjectURL(blob);
+        blobUrlRef.current = url;
+        if (cancelled) {
+          URL.revokeObjectURL(url);
+          return;
+        }
+        setState({ kind: "ready", src: url });
+      } catch {
+        if (!cancelled) setState({ kind: "error" });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+      if (blobUrlRef.current) {
+        URL.revokeObjectURL(blobUrlRef.current);
+        blobUrlRef.current = null;
+      }
+    };
+  }, [workspaceId, attachment.uri]);
+
+  if (state.kind === "error") {
+    return <AttachmentChip attachment={attachment} onDownload={onDownload} tone={tone} />;
+  }
+  if (state.kind === "idle" || state.kind === "loading") {
+    return (
+      <div
+        className="rounded-md border border-line/50 bg-surface-card/40 animate-pulse"
+        style={{ width: 280, height: 40 }}
+        aria-label={`Loading ${attachment.name}`}
+      />
+    );
+  }
+
+  return (
+    <div
+      className={`inline-flex flex-col gap-1 rounded-md border px-2 py-1 ${
+        tone === "user" ? "border-blue-400/30 bg-accent-strong/10" : "border-line/50 bg-surface-card/40"
+      }`}
+    >
+      {/* Filename label so the user knows what they're hearing
+          before pressing play. Short, single-line, truncated. */}
+      <span className="text-[10px] text-ink-mid truncate max-w-[280px]" title={attachment.name}>
+        {attachment.name}
+      </span>
+      <audio
+        controls
+        preload="metadata"
+        src={state.src}
+        style={{ width: 280, height: 32 }}
+        onError={() => setState({ kind: "error" })}
+      >
+        {attachment.name}
+      </audio>
+    </div>
+  );
+}
+
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
@@ -0,0 +1,198 @@
+"use client";
+
+// AttachmentImage — inline image thumbnail + click-to-fullscreen.
+// First "specialized renderer" landing under RFC #2991 PR-1.
+//
+// Auth model
+// ----------
+//
+// The Critical UX/Security trade-off (per RFC's hostile-self-review
+// item #2): the bytes live behind workspace auth. A bare
+// <img src="https://reno-stars.../chat/download?path=…"> WILL NOT
+// include our cookie + Origin headers when the browser loads it —
+// even for same-origin canvas-server, the auth chain (cookie + token
+// + X-Molecule-Org-Slug header) is JS-injected, not browser-default.
+//
+// Solution: same auth path the chip download uses. Fetch the bytes
+// with the JS auth headers, wrap in a Blob, hand the browser an
+// ObjectURL. The image renders from local memory; no second request,
+// no auth leakage, no CORS pain.
+//
+// That same blob URL is what the lightbox shows on click — single
+// fetch, cached for the lifetime of the message bubble.
+//
+// Failure modes
+// -------------
+//
+// - Fetch fails (404, 403, network) → fall back to AttachmentChip
+//   (the existing file-pill download flow). The user still gets a
+//   working download; we just lose the inline preview.
+// - Decoded as non-image (server returned wrong Content-Type, or
+//   bytes are corrupt) → onError handler swaps to AttachmentChip.
+// - Bytes too large — no enforcement here; the server caps at 25MB
+//   per file (chat_files.go), which is too big for a thumbnail but
+//   acceptable for a chat-attached image. If we hit pain we can
+//   downscale via canvas, but defer that to v2.
+
+import { useState, useEffect, useRef } from "react";
+import type { ChatAttachment } from "./types";
+import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
+import { AttachmentLightbox } from "./AttachmentLightbox";
+import { AttachmentChip } from "./AttachmentViews";
+
+interface Props {
+  workspaceId: string;
+  attachment: ChatAttachment;
+  onDownload: (a: ChatAttachment) => void;
+  tone: "user" | "agent";
+}
+
+type FetchState =
+  | { kind: "idle" }
+  | { kind: "loading" }
+  | { kind: "ready"; blobUrl: string }
+  | { kind: "error" };
+
+export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: Props) {
+  const [state, setState] = useState<FetchState>({ kind: "idle" });
+  const [open, setOpen] = useState(false);
+  // Track whether we created the ObjectURL so cleanup runs on the
+  // exact value we minted (state could change between effect setup
+  // and effect cleanup if a new fetch fires).
+  const blobUrlRef = useRef<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    setState({ kind: "loading" });
+
+    // For non-platform URIs (http/https external image hosts) we can
+    // skip the auth fetch — browser loads them directly. We bail out
+    // of the auth-fetch flow and use the raw URL via resolveAttachmentHref.
+    if (!isPlatformAttachment(attachment.uri)) {
+      const href = resolveAttachmentHref(workspaceId, attachment.uri);
+      if (!cancelled) setState({ kind: "ready", blobUrl: href });
+      return;
+    }
+
+    // Platform-auth path: identical to downloadChatFile but we keep
+    // the blob (don't trigger a Save-As). Use the same headers it does
+    // by going through it indirectly — no, downloadChatFile triggers a
+    // Save-As. Need a separate fetch.
+    void (async () => {
+      try {
+        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        // Read the same env var downloadChatFile reads — single source
+        // of truth would be cleaner; refactor opportunity for PR-2 if
+        // we add the same path to AttachmentVideo.
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        const res = await fetch(href, {
+          headers,
+          credentials: "include",
+          signal: AbortSignal.timeout(30_000),
+        });
+        if (!res.ok) {
+          if (!cancelled) setState({ kind: "error" });
+          return;
+        }
+        const blob = await res.blob();
+        const url = URL.createObjectURL(blob);
+        blobUrlRef.current = url;
+        if (cancelled) {
+          URL.revokeObjectURL(url);
+          return;
+        }
+        setState({ kind: "ready", blobUrl: url });
+      } catch {
+        if (!cancelled) setState({ kind: "error" });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+      // Free the ObjectURL when the bubble unmounts — keeps memory
+      // bounded across long chat histories.
+      if (blobUrlRef.current) {
+        URL.revokeObjectURL(blobUrlRef.current);
+        blobUrlRef.current = null;
+      }
+    };
+  }, [workspaceId, attachment.uri]);
+
+  // Failure → render the existing file chip. Maintains the download
+  // affordance even if preview fails; the user never gets stuck.
+  if (state.kind === "error") {
+    return <AttachmentChip attachment={attachment} onDownload={onDownload} tone={tone} />;
+  }
+
+  // Loading → small placeholder pill so the bubble doesn't reflow
+  // when the image lands. Sized to roughly the thumbnail's aspect
+  // ratio guess (a 240x180 box) so the layout is stable.
+  if (state.kind === "loading" || state.kind === "idle") {
+    return (
+      <div
+        className="rounded-md border border-line/50 bg-surface-card/40 animate-pulse"
+        style={{ width: 240, height: 180 }}
+        aria-label={`Loading ${attachment.name}`}
+      />
+    );
+  }
+
+  // Ready → inline thumbnail with click handler. The img has its
+  // own onError so a corrupt blob (server returned the right size
+  // but invalid bytes) falls through to the chip too.
+  return (
+    <>
+      <button
+        type="button"
+        onClick={() => setOpen(true)}
+        title={`Preview ${attachment.name}`}
+        className={`group relative inline-block max-w-full rounded-lg overflow-hidden border focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 ${
+          tone === "user" ? "border-blue-400/30" : "border-line/50"
+        }`}
+        aria-label={`Open ${attachment.name} preview`}
+      >
+        <img
+          src={state.blobUrl}
+          alt={attachment.name}
+          // Cap thumbnail so a tall portrait image doesn't blow up
+          // the message bubble. The lightbox shows the full size.
+          style={{ maxWidth: 240, maxHeight: 180, display: "block" }}
+          onError={() => setState({ kind: "error" })}
+        />
+        {/* Tiny filename label on hover — same affordance as Slack/
+            Discord. Helps when several images land in one bubble. */}
+        <div className="absolute bottom-0 inset-x-0 bg-black/60 text-white text-[10px] px-1.5 py-0.5 truncate opacity-0 group-hover:opacity-100 transition-opacity">
+          {attachment.name}
+        </div>
+      </button>
+      <AttachmentLightbox
+        open={open}
+        onClose={() => setOpen(false)}
+        ariaLabel={`Preview of ${attachment.name}`}
+      >
+        <img
+          src={state.blobUrl}
+          alt={attachment.name}
+          className="max-w-[95vw] max-h-[90vh] object-contain"
+        />
+      </AttachmentLightbox>
+    </>
+  );
+}
+
+// Internal helper — duplicated from uploads.ts (it's not exported
+// there). Kept local so this component doesn't reach into private
+// surface; if AttachmentVideo / AttachmentPDF in PR-2/PR-3 also need
+// it, lift to an exported helper at that point (the third-caller
+// rule).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  // Tenant subdomain shape: <slug>.moleculesai.app
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
@@ -0,0 +1,122 @@
+"use client";
+
+// AttachmentLightbox — shared fullscreen modal for image / PDF /
+// (future) any-fullscreen-renderable kind. Owns:
+//   - Backdrop + centered viewport
+//   - Esc to close
+//   - Click-outside to close
+//   - Focus trap (focus enters the modal on open, restored on close)
+//   - prefers-reduced-motion respect (no animation)
+//
+// Per RFC #2991 Phase 2: this is the third-caller justification for
+// the abstraction (image, PDF, future video-fullscreen all want the
+// same modal contract). Not invented for a single caller.
+//
+// Design choices:
+//
+// 1. Portals — we don't use ReactDOM.createPortal because the canvas
+//    chat surface already renders at a high z-index and the modal's
+//    fixed-position layout reaches the viewport regardless. Saves a
+//    portal mount in the common case + avoids the SSR warning (canvas
+//    is "use client" but the parent shell is server-rendered).
+//
+// 2. Focus trap — inline implementation (not a 3rd-party dep). The
+//    chat lightbox needs to trap focus only across two interactive
+//    elements (close button + content), so a 100-line manual trap
+//    beats pulling in focus-trap-react for ~12KB.
+//
+// 3. Escape key — listened on `document` (not on the modal element)
+//    because the user can be focused anywhere when they hit Esc,
+//    including outside the modal if focus restoration ever fails.
+//    The cleanup runs on unmount so leaked listeners don't persist.
+
+import { useEffect, useRef, useCallback, type ReactNode } from "react";
+
+interface Props {
+  /** Render the lightbox when true. Caller controls open state. */
+  open: boolean;
+  /** Caller's handler for "close" — Esc, click-outside, X button. */
+  onClose: () => void;
+  /** Accessible label for the modal — voiced by screen readers when
+   *  the dialog opens. The caller knows what's inside (image alt
+   *  text, PDF filename) and supplies it. */
+  ariaLabel: string;
+  /** The thing being shown in fullscreen — <img>, <embed>, etc.
+   *  Caller is responsible for sizing it to fit the viewport (we
+   *  give it max-w-full max-h-full via CSS). */
+  children: ReactNode;
+}
+
+export function AttachmentLightbox({ open, onClose, ariaLabel, children }: Props) {
+  const closeButtonRef = useRef<HTMLButtonElement>(null);
+  const previousFocusRef = useRef<HTMLElement | null>(null);
+
+  // Focus enters the close button on open + restores to whatever
+  // had focus when the modal closes. Without this, the user's
+  // focus is left wherever they clicked (often the chip) and Tab
+  // walks them back through the chat surface — disorienting.
+  useEffect(() => {
+    if (!open) return;
+    previousFocusRef.current = document.activeElement as HTMLElement | null;
+    closeButtonRef.current?.focus();
+    return () => {
+      previousFocusRef.current?.focus?.();
+    };
+  }, [open]);
+
+  // Esc closes; bound on document so the user can press Esc
+  // regardless of where focus actually is.
+  useEffect(() => {
+    if (!open) return;
+    const onKey = (e: KeyboardEvent) => {
+      if (e.key === "Escape") {
+        e.preventDefault();
+        onClose();
+      }
+    };
+    document.addEventListener("keydown", onKey);
+    return () => document.removeEventListener("keydown", onKey);
+  }, [open, onClose]);
+
+  // Click on the backdrop (NOT the content) closes. Content's own
+  // onClick stops propagation so the user can interact (e.g. native
+  // PDF viewer controls) without dismissing the modal.
+  const onBackdropClick = useCallback(
+    (e: React.MouseEvent) => {
+      if (e.target === e.currentTarget) onClose();
+    },
+    [onClose],
+  );
+
+  if (!open) return null;
+
+  return (
+    <div
+      role="dialog"
+      aria-modal="true"
+      aria-label={ariaLabel}
+      className="fixed inset-0 z-50 flex items-center justify-center bg-black/85 motion-reduce:transition-none transition-opacity"
+      onClick={onBackdropClick}
+    >
+      {/* Close button — top-right, large hit area, keyboard-focusable.
+          ariaLabel includes "Close" so SR users hear what action it
+          performs, not just the X glyph. */}
+      <button
+        ref={closeButtonRef}
+        onClick={onClose}
+        aria-label="Close preview"
+        className="absolute top-4 right-4 rounded-full bg-white/10 hover:bg-white/20 text-white p-2 focus:outline-none focus-visible:ring-2 focus-visible:ring-white"
+      >
+        <svg width="20" height="20" viewBox="0 0 24 24" fill="none" aria-hidden="true">
+          <path d="M5 5l14 14M19 5l-14 14" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
+        </svg>
+      </button>
+      <div
+        className="max-w-[95vw] max-h-[90vh] flex items-center justify-center"
+        onClick={(e) => e.stopPropagation()}
+      >
+        {children}
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,197 @@
+"use client";
+
+// AttachmentPDF — inline PDF preview using the browser's native viewer
+// (RFC #2991, PR-3).
+//
+// Why browser-native (not PDF.js / pdfjs-dist):
+//
+//   - Chrome / Edge / Firefox / Safari (desktop) all ship a built-in
+//     PDF viewer. <embed src="…blob"> renders correctly; user gets
+//     scroll, zoom, search, print for free.
+//   - PDF.js adds ~3 MB to the canvas bundle. For an MVP that
+//     specifically targets desktop chat, the browser viewer is good
+//     enough. v2 can wire pdfjs-dist if Safari mobile coverage
+//     becomes a real ask (its built-in viewer is preview-only).
+//
+// Auth model: identical to AttachmentImage / Video / Audio — fetch
+// bytes with JS-injected auth headers, wrap in Blob, hand the
+// browser an ObjectURL. <embed src="blob:…#toolbar=0"> would
+// suppress the toolbar; we keep it on so the user gets standard
+// PDF affordances.
+//
+// Fullscreen: AttachmentLightbox hosts the PDF at viewport size on
+// click. Same shared modal as image — third caller justifies the
+// abstraction (per RFC #2991 design).
+//
+// Failure modes:
+//
+//   - Fetch fail → AttachmentChip fallback (download still works)
+//   - Browser refuses to render the PDF (Safari mobile, plugin
+//     disabled, corrupt bytes) → <embed onError> swap to chip.
+//     Note: <embed> doesn't fire onError reliably across browsers.
+//     Defensive fallback: if blob load triggers no onLoad after a
+//     timeout, swap to chip. Implemented as a 3-second watchdog.
+
+import { useState, useEffect, useRef } from "react";
+import type { ChatAttachment } from "./types";
+import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
+import { AttachmentLightbox } from "./AttachmentLightbox";
+import { AttachmentChip } from "./AttachmentViews";
+
+interface Props {
+  workspaceId: string;
+  attachment: ChatAttachment;
+  onDownload: (a: ChatAttachment) => void;
+  tone: "user" | "agent";
+}
+
+type FetchState =
+  | { kind: "idle" }
+  | { kind: "loading" }
+  | { kind: "ready"; blobUrl: string }
+  | { kind: "error" };
+
+export function AttachmentPDF({ workspaceId, attachment, onDownload, tone }: Props) {
+  const [state, setState] = useState<FetchState>({ kind: "idle" });
+  const [open, setOpen] = useState(false);
+  const blobUrlRef = useRef<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    setState({ kind: "loading" });
+
+    if (!isPlatformAttachment(attachment.uri)) {
+      const href = resolveAttachmentHref(workspaceId, attachment.uri);
+      if (!cancelled) setState({ kind: "ready", blobUrl: href });
+      return;
+    }
+
+    void (async () => {
+      try {
+        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        const res = await fetch(href, {
+          headers,
+          credentials: "include",
+          signal: AbortSignal.timeout(60_000),
+        });
+        if (!res.ok) {
+          if (!cancelled) setState({ kind: "error" });
+          return;
+        }
+        const blob = await res.blob();
+        const url = URL.createObjectURL(blob);
+        blobUrlRef.current = url;
+        if (cancelled) {
+          URL.revokeObjectURL(url);
+          return;
+        }
+        setState({ kind: "ready", blobUrl: url });
+      } catch {
+        if (!cancelled) setState({ kind: "error" });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+      if (blobUrlRef.current) {
+        URL.revokeObjectURL(blobUrlRef.current);
+        blobUrlRef.current = null;
+      }
+    };
+  }, [workspaceId, attachment.uri]);
+
+  if (state.kind === "error") {
+    return <AttachmentChip attachment={attachment} onDownload={onDownload} tone={tone} />;
+  }
+  if (state.kind === "idle" || state.kind === "loading") {
+    return (
+      <div
+        className="rounded-md border border-line/50 bg-surface-card/40 animate-pulse flex items-center gap-1.5 px-2 py-1 text-[10px] text-ink-mid"
+        style={{ width: 240 }}
+        aria-label={`Loading ${attachment.name}`}
+      >
+        <PdfGlyph />
+        Loading {attachment.name}…
+      </div>
+    );
+  }
+
+  // PDF preview chip — clicking it opens the full embed in the
+  // shared lightbox. We don't inline-embed in the bubble because
+  // even a small embed renders at 600×400 minimum on most browsers
+  // (the PDF viewer's natural scale), which would dominate every
+  // chat bubble. Slack/Linear/Notion all gate PDF preview behind a
+  // click for the same reason.
+  return (
+    <>
+      <button
+        type="button"
+        onClick={() => setOpen(true)}
+        title={`Preview ${attachment.name}`}
+        className={`inline-flex items-center gap-1.5 rounded-md border px-2 py-1 text-[10px] hover:bg-surface-card/70 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 ${
+          tone === "user"
+            ? "border-blue-400/30 bg-accent-strong/10 text-blue-100"
+            : "border-line/50 bg-surface-card/40 text-ink"
+        }`}
+        aria-label={`Open ${attachment.name} preview`}
+      >
+        <PdfGlyph />
+        <span className="truncate max-w-[200px]">{attachment.name}</span>
+        <span className="opacity-60 shrink-0">PDF</span>
+      </button>
+      <AttachmentLightbox
+        open={open}
+        onClose={() => setOpen(false)}
+        ariaLabel={`Preview of ${attachment.name}`}
+      >
+        <embed
+          src={state.blobUrl}
+          type="application/pdf"
+          // The lightbox's content slot caps at 95vw / 90vh, so size
+          // 100% within that and let the user scroll inside the PDF
+          // viewer.
+          style={{ width: "95vw", height: "90vh" }}
+          aria-label={attachment.name}
+        />
+      </AttachmentLightbox>
+    </>
+  );
+}
+
+function PdfGlyph() {
+  return (
+    <svg
+      width="11"
+      height="11"
+      viewBox="0 0 16 16"
+      fill="none"
+      aria-hidden="true"
+      className="shrink-0 opacity-70"
+    >
+      <path
+        d="M4 2h5l3 3v9a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V3a1 1 0 0 1 1-1Z"
+        stroke="currentColor"
+        strokeWidth="1.3"
+      />
+      <path d="M9 2v3h3" stroke="currentColor" strokeWidth="1.3" />
+      <path
+        d="M5.5 9.5h1m1 0h1m-3 2h2"
+        stroke="currentColor"
+        strokeWidth="1.1"
+        strokeLinecap="round"
+      />
+    </svg>
+  );
+}
+
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
@@ -0,0 +1,90 @@
+"use client";
+
+// AttachmentPreview — the SSOT dispatch point for chat-attachment
+// rendering (RFC #2991, PR-1).
+//
+// Replaces the previous direct-AttachmentChip usage in ChatTab so
+// every attachment routes through the same preview-kind taxonomy.
+// Adding a new renderer (PDF, video, audio, text) in PR-2/PR-3 is a
+// one-arm extension to the switch below — no touch-points scattered
+// across ChatTab.tsx, AgentCommsPanel.tsx, or other chat consumers.
+//
+// Per the RFC's Phase 2: this is the only file that should directly
+// import any kind-specific component. ChatTab and other callers
+// import only AttachmentPreview — no leaking of the kind taxonomy
+// into the consumer surface.
+
+import type { ChatAttachment } from "./types";
+import { getAttachmentPreviewKind } from "./preview-kind";
+import { AttachmentImage } from "./AttachmentImage";
+import { AttachmentVideo } from "./AttachmentVideo";
+import { AttachmentAudio } from "./AttachmentAudio";
+import { AttachmentPDF } from "./AttachmentPDF";
+import { AttachmentTextPreview } from "./AttachmentTextPreview";
+import { AttachmentChip } from "./AttachmentViews";
+
+interface Props {
+  workspaceId: string;
+  attachment: ChatAttachment;
+  /** Caller's download handler — used for the kind=file fallback
+   *  and as the kind-specific renderers' fallback when their own
+   *  preview fails (e.g. image fetch errored). */
+  onDownload: (a: ChatAttachment) => void;
+  /** Tone follows the message bubble's role — used for visual
+   *  variant only. */
+  tone: "user" | "agent";
+}
+
+export function AttachmentPreview({ workspaceId, attachment, onDownload, tone }: Props) {
+  const kind = getAttachmentPreviewKind(attachment.mimeType, attachment.uri, attachment.name);
+  switch (kind) {
+    case "image":
+      return (
+        <AttachmentImage
+          workspaceId={workspaceId}
+          attachment={attachment}
+          onDownload={onDownload}
+          tone={tone}
+        />
+      );
+    case "video":
+      return (
+        <AttachmentVideo
+          workspaceId={workspaceId}
+          attachment={attachment}
+          onDownload={onDownload}
+          tone={tone}
+        />
+      );
+    case "audio":
+      return (
+        <AttachmentAudio
+          workspaceId={workspaceId}
+          attachment={attachment}
+          onDownload={onDownload}
+          tone={tone}
+        />
+      );
+    case "pdf":
+      return (
+        <AttachmentPDF
+          workspaceId={workspaceId}
+          attachment={attachment}
+          onDownload={onDownload}
+          tone={tone}
+        />
+      );
+    case "text":
+      return (
+        <AttachmentTextPreview
+          workspaceId={workspaceId}
+          attachment={attachment}
+          onDownload={onDownload}
+          tone={tone}
+        />
+      );
+    case "file":
+    default:
+      return <AttachmentChip attachment={attachment} onDownload={onDownload} tone={tone} />;
+  }
+}
@@ -0,0 +1,190 @@
+"use client";
+
+// AttachmentTextPreview — inline preview for text/code/JSON/YAML/etc
+// (RFC #2991, PR-3).
+//
+// Shape: render first N lines (~10) in monospace inside the bubble.
+// Click "Show more" to expand fully; the lightbox is reserved for
+// image/PDF where viewport-size matters. For text, the bubble itself
+// can host the full content.
+//
+// Why no syntax highlighting (yet):
+//
+//   - Pulling in shiki / highlight.js / prism adds 200-500KB to the
+//     bundle for a feature that's nice-to-have. MVP uses plain
+//     <pre><code>.
+//   - Future: lazy-load shiki on first text-attachment render. v2
+//     if the user reports the gap.
+//
+// Auth: same fetch+text() pattern as image/video/audio, but we read
+// the text directly instead of building a Blob URL — no <img>/<video>
+// element to feed.
+//
+// Memory: text files are usually small. We cap the preview at 256 KB
+// fetched (large logs would otherwise crash the bubble). If the file
+// exceeds the cap, we show what we got + a "truncated" note + a chip
+// to download the full file.
+
+import { useState, useEffect } from "react";
+import type { ChatAttachment } from "./types";
+import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
+import { AttachmentChip } from "./AttachmentViews";
+
+interface Props {
+  workspaceId: string;
+  attachment: ChatAttachment;
+  onDownload: (a: ChatAttachment) => void;
+  tone: "user" | "agent";
+}
+
+type FetchState =
+  | { kind: "idle" }
+  | { kind: "loading" }
+  | { kind: "ready"; text: string; truncated: boolean }
+  | { kind: "error" };
+
+const PREVIEW_LINE_COUNT = 10;
+const MAX_FETCH_BYTES = 256 * 1024; // 256 KB
+
+export function AttachmentTextPreview({ workspaceId, attachment, onDownload, tone }: Props) {
+  const [state, setState] = useState<FetchState>({ kind: "idle" });
+  const [expanded, setExpanded] = useState(false);
+
+  useEffect(() => {
+    let cancelled = false;
+    setState({ kind: "loading" });
+
+    void (async () => {
+      try {
+        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        if (isPlatformAttachment(attachment.uri)) {
+          const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+          if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+          const slug = getTenantSlug();
+          if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        }
+        const res = await fetch(href, {
+          headers,
+          credentials: "include",
+          signal: AbortSignal.timeout(30_000),
+        });
+        if (!res.ok) {
+          if (!cancelled) setState({ kind: "error" });
+          return;
+        }
+        // Read up to MAX_FETCH_BYTES. Use the standard ReadableStream
+        // path so we don't materialise a 100MB log into memory.
+        const reader = res.body?.getReader();
+        if (!reader) {
+          // Fallback: small text file, just .text() it.
+          const text = await res.text();
+          if (cancelled) return;
+          setState({
+            kind: "ready",
+            text: text.slice(0, MAX_FETCH_BYTES),
+            truncated: text.length > MAX_FETCH_BYTES,
+          });
+          return;
+        }
+        let received = 0;
+        const chunks: BlobPart[] = [];
+        while (received < MAX_FETCH_BYTES) {
+          const { value, done } = await reader.read();
+          if (done) break;
+          // Copy into a fresh ArrayBuffer-backed view — TS in lib.dom
+          // 2026 narrows BlobPart away from SharedArrayBuffer-backed
+          // Uint8Arrays. Blob() accepts the copy fine at runtime.
+          const copy = new Uint8Array(value.byteLength);
+          copy.set(value);
+          chunks.push(copy.buffer);
+          received += value.byteLength;
+        }
+        // If we hit the cap but the stream isn't done, mark truncated.
+        const truncated = received >= MAX_FETCH_BYTES;
+        if (truncated) reader.cancel();
+        const blob = new Blob(chunks);
+        const text = await blob.text();
+        if (cancelled) return;
+        setState({ kind: "ready", text, truncated });
+      } catch {
+        if (!cancelled) setState({ kind: "error" });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [workspaceId, attachment.uri]);
+
+  if (state.kind === "error") {
+    return <AttachmentChip attachment={attachment} onDownload={onDownload} tone={tone} />;
+  }
+  if (state.kind === "idle" || state.kind === "loading") {
+    return (
+      <div
+        className="rounded-md border border-line/50 bg-surface-card/40 animate-pulse"
+        style={{ width: 320, height: 80 }}
+        aria-label={`Loading ${attachment.name}`}
+      />
+    );
+  }
+
+  const lines = state.text.split("\n");
+  const preview = expanded ? state.text : lines.slice(0, PREVIEW_LINE_COUNT).join("\n");
+  const showExpandButton = !expanded && lines.length > PREVIEW_LINE_COUNT;
+
+  return (
+    <div
+      className={`inline-block max-w-full rounded-md border ${
+        tone === "user" ? "border-blue-400/30 bg-accent-strong/10" : "border-line/50 bg-surface-card/40"
+      }`}
+    >
+      <div className="flex items-center justify-between px-2 py-1 border-b border-line/40 text-[10px] text-ink-mid">
+        <span className="truncate max-w-[220px]" title={attachment.name}>
+          {attachment.name}
+        </span>
+        <button
+          type="button"
+          onClick={() => onDownload(attachment)}
+          className="text-ink-soft hover:text-ink"
+          title={`Download ${attachment.name}`}
+          aria-label={`Download ${attachment.name}`}
+        >
+          ⬇
+        </button>
+      </div>
+      <pre className="overflow-x-auto px-2 py-1.5 text-[10px] leading-snug text-ink whitespace-pre font-mono max-w-[480px] max-h-[300px]">
+        <code>{preview}</code>
+      </pre>
+      {showExpandButton && (
+        <button
+          type="button"
+          onClick={() => setExpanded(true)}
+          className="block w-full text-center text-[10px] text-ink-mid hover:text-ink py-1 border-t border-line/40"
+        >
+          Show all {lines.length} lines
+        </button>
+      )}
+      {state.truncated && (
+        <div className="px-2 py-1 text-[10px] text-warm border-t border-line/40">
+          Preview truncated at {Math.round(MAX_FETCH_BYTES / 1024)} KB —{" "}
+          <button
+            type="button"
+            onClick={() => onDownload(attachment)}
+            className="underline"
+          >
+            download full file
+          </button>
+        </div>
+      )}
+    </div>
+  );
+}
+
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
@@ -0,0 +1,157 @@
+"use client";
+
+// AttachmentVideo — inline native HTML5 <video controls> player for
+// chat attachments (RFC #2991, PR-2).
+//
+// Why HTML5-native (vs custom JS player):
+//
+//   - Browser vendors ship hardware-accelerated decoders, captions,
+//     and fullscreen UI. We get all of it for free.
+//   - Native fullscreen via the <video> element's built-in button
+//     (no AttachmentLightbox needed for video — the browser does it).
+//   - Mobile-friendly: iOS / Android Safari + Chrome handle the
+//     pinch + scrub UX the user already knows.
+//
+// Auth model — identical to AttachmentImage:
+// platform-auth URIs need our cookie/token, so we fetch the bytes,
+// wrap in a Blob, hand the browser an ObjectURL via <video src=>.
+// External (http/https) URIs skip the fetch and use the raw URL.
+//
+// Memory caveat: a Blob holds the entire video in JS memory until
+// the bubble unmounts. For multi-hundred-MB videos this is bad. The
+// server caps single-file uploads at 25MB (chat_files.go), so we're
+// bounded; if larger files become a real shape, switch to streaming
+// via MediaSource or just `<video src=…>` with a credentials-aware
+// fetch via service worker. v2 if measured-needed.
+
+import { useState, useEffect, useRef } from "react";
+import type { ChatAttachment } from "./types";
+import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
+import { AttachmentChip } from "./AttachmentViews";
+
+interface Props {
+  workspaceId: string;
+  attachment: ChatAttachment;
+  onDownload: (a: ChatAttachment) => void;
+  tone: "user" | "agent";
+}
+
+type FetchState =
+  | { kind: "idle" }
+  | { kind: "loading" }
+  | { kind: "ready"; src: string }
+  | { kind: "error" };
+
+export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: Props) {
+  const [state, setState] = useState<FetchState>({ kind: "idle" });
+  const blobUrlRef = useRef<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    setState({ kind: "loading" });
+
+    if (!isPlatformAttachment(attachment.uri)) {
+      // External video (http/https) — let the browser stream it
+      // natively without the JS-blob detour.
+      const href = resolveAttachmentHref(workspaceId, attachment.uri);
+      if (!cancelled) setState({ kind: "ready", src: href });
+      return;
+    }
+
+    void (async () => {
+      try {
+        const href = resolveAttachmentHref(workspaceId, attachment.uri);
+        const headers: Record<string, string> = {};
+        const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
+        if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
+        const slug = getTenantSlug();
+        if (slug) headers["X-Molecule-Org-Slug"] = slug;
+        const res = await fetch(href, {
+          headers,
+          credentials: "include",
+          // Videos are larger than images on average; give the request
+          // more headroom. The server's per-request body cap (50MB) is
+          // still the actual ceiling.
+          signal: AbortSignal.timeout(120_000),
+        });
+        if (!res.ok) {
+          if (!cancelled) setState({ kind: "error" });
+          return;
+        }
+        const blob = await res.blob();
+        const url = URL.createObjectURL(blob);
+        blobUrlRef.current = url;
+        if (cancelled) {
+          URL.revokeObjectURL(url);
+          return;
+        }
+        setState({ kind: "ready", src: url });
+      } catch {
+        if (!cancelled) setState({ kind: "error" });
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+      if (blobUrlRef.current) {
+        URL.revokeObjectURL(blobUrlRef.current);
+        blobUrlRef.current = null;
+      }
+    };
+  }, [workspaceId, attachment.uri]);
+
+  if (state.kind === "error") {
+    return <AttachmentChip attachment={attachment} onDownload={onDownload} tone={tone} />;
+  }
+  if (state.kind === "idle" || state.kind === "loading") {
+    return (
+      <div
+        className="rounded-md border border-line/50 bg-surface-card/40 animate-pulse"
+        style={{ width: 320, height: 180 }}
+        aria-label={`Loading ${attachment.name}`}
+      />
+    );
+  }
+
+  return (
+    <div
+      className={`inline-block rounded-lg overflow-hidden border ${
+        tone === "user" ? "border-blue-400/30" : "border-line/50"
+      }`}
+    >
+      <video
+        controls
+        // preload="metadata" so the browser fetches just enough to
+        // show duration + first frame thumbnail without streaming
+        // the whole file before the user clicks play.
+        preload="metadata"
+        // playsInline keeps mobile Safari from auto-fullscreening
+        // on play; the user can still hit the native fullscreen
+        // button (or PiP on Chrome) if they want.
+        playsInline
+        // Native fullscreen via the <video> control bar; no
+        // AttachmentLightbox needed for video.
+        src={state.src}
+        // Cap thumbnail / inline display so the bubble doesn't blow
+        // up vertical layout for tall portrait clips. The native
+        // fullscreen button uses the original aspect ratio.
+        style={{ maxWidth: 320, maxHeight: 240, display: "block" }}
+        // Bytes that aren't actually a valid video (corrupt blob,
+        // wrong Content-Type) fail load → swap to chip.
+        onError={() => setState({ kind: "error" })}
+      >
+        <track kind="captions" />
+        {attachment.name}
+      </video>
+    </div>
+  );
+}
+
+// Internal helper — same shape as AttachmentImage's. Lifted to a
+// shared util in PR-2.5 if a third caller needs it (PDF, audio).
+function getTenantSlug(): string | null {
+  if (typeof window === "undefined") return null;
+  const host = window.location.hostname;
+  const m = host.match(/^([^.]+)\.moleculesai\.app$/);
+  return m ? m[1] : null;
+}
@@ -0,0 +1,317 @@
+// @vitest-environment jsdom
+//
+// AttachmentPreview component tests — pin the dispatch contract:
+// each kind goes to its dedicated renderer; kind=file falls back to
+// the chip; failure modes don't strand the user without a download.
+//
+// Per RFC #2991 Phase 4: every test must be able to fail. No
+// asserting-the-mock; we render the real component and inspect what
+// the DOM actually shows.
+
+import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
+import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
+import React from "react";
+
+afterEach(cleanup);
+
+// Mock the auth-token env var so AttachmentImage's fetch doesn't
+// hit a real network. The fetch is itself mocked below.
+vi.stubEnv("NEXT_PUBLIC_ADMIN_TOKEN", "test-token");
+
+// Mock fetch so the AttachmentImage path can return a synthetic blob.
+// Tests override per-case to simulate success / 404 / network fail.
+const fetchMock = vi.fn();
+beforeEach(() => {
+  fetchMock.mockReset();
+  vi.stubGlobal("fetch", fetchMock);
+  // jsdom doesn't implement URL.createObjectURL — stub.
+  global.URL.createObjectURL = vi.fn(() => "blob:test-url");
+  global.URL.revokeObjectURL = vi.fn();
+});
+
+import { AttachmentPreview } from "../AttachmentPreview";
+import type { ChatAttachment } from "../types";
+
+const onDownload = vi.fn();
+
+function preview(att: ChatAttachment) {
+  return render(
+    <AttachmentPreview
+      workspaceId="ws-1"
+      attachment={att}
+      onDownload={onDownload}
+      tone="agent"
+    />,
+  );
+}
+
+describe("AttachmentPreview dispatch", () => {
+  it("kind=file → renders the AttachmentChip download button (existing fallback)", () => {
+    preview({ uri: "workspace:/workspace/tmp/foo.zip", name: "foo.zip", mimeType: "application/zip" });
+    // The chip's button title is `Download <name>`. Pre-fix this was
+    // the only render path; now it's the kind=file fallback.
+    expect(screen.getByTitle(/Download foo\.zip/i)).toBeTruthy();
+  });
+
+  it("kind=image (mime) → renders the AttachmentImage path (loading placeholder until fetch resolves)", async () => {
+    // never-resolving fetch → component sits in loading state. Pin
+    // the loading placeholder shape.
+    fetchMock.mockReturnValue(new Promise(() => {}));
+    preview({ uri: "workspace:/workspace/tmp/photo.png", name: "photo.png", mimeType: "image/png" });
+    expect(await screen.findByLabelText(/Loading photo\.png/i)).toBeTruthy();
+    // The chip download button must NOT be in the DOM during the
+    // image path's loading state — proves dispatch routed correctly.
+    expect(screen.queryByTitle(/Download photo\.png/i)).toBeNull();
+  });
+
+  it("kind=image (extension fallback when mime is empty) → image path", async () => {
+    fetchMock.mockReturnValue(new Promise(() => {}));
+    preview({ uri: "workspace:/workspace/screenshot.jpg", name: "screenshot.jpg" /* no mime */ });
+    expect(await screen.findByLabelText(/Loading screenshot\.jpg/i)).toBeTruthy();
+  });
+
+  it("kind=image fetch fails (404) → falls back to AttachmentChip so the user can still download", async () => {
+    fetchMock.mockResolvedValue({ ok: false, status: 404 });
+    preview({ uri: "workspace:/workspace/tmp/missing.png", name: "missing.png", mimeType: "image/png" });
+    // The fallback chip shows up on error.
+    await waitFor(() => {
+      expect(screen.getByTitle(/Download missing\.png/i)).toBeTruthy();
+    });
+  });
+
+  it("kind=image fetch network error → falls back to chip", async () => {
+    fetchMock.mockRejectedValue(new Error("network down"));
+    preview({ uri: "workspace:/workspace/tmp/x.png", name: "x.png", mimeType: "image/png" });
+    await waitFor(() => {
+      expect(screen.getByTitle(/Download x\.png/i)).toBeTruthy();
+    });
+  });
+
+  it("kind=image success → renders <img> + clicking opens the lightbox", async () => {
+    fetchMock.mockResolvedValue({
+      ok: true,
+      blob: async () => new Blob(["fake-png-bytes"], { type: "image/png" }),
+    });
+    preview({ uri: "workspace:/workspace/tmp/ok.png", name: "ok.png", mimeType: "image/png" });
+
+    // Image element shows up after the fetch resolves.
+    const img = await screen.findByAltText(/ok\.png/);
+    expect(img).toBeTruthy();
+    expect((img as HTMLImageElement).src).toBe("blob:test-url");
+
+    // Lightbox closed initially — the dialog must not be in the DOM.
+    expect(screen.queryByRole("dialog")).toBeNull();
+
+    // Click the thumbnail button (the surrounding <button>) → lightbox opens.
+    const button = screen.getByLabelText(/Open ok\.png preview/i);
+    fireEvent.click(button);
+
+    expect(await screen.findByRole("dialog")).toBeTruthy();
+    expect(screen.getByLabelText(/Close preview/i)).toBeTruthy();
+  });
+
+  it("kind=image lightbox closes on Esc keypress", async () => {
+    fetchMock.mockResolvedValue({
+      ok: true,
+      blob: async () => new Blob(["b"], { type: "image/png" }),
+    });
+    preview({ uri: "workspace:/workspace/tmp/x.png", name: "x.png", mimeType: "image/png" });
+    await screen.findByAltText(/x\.png/);
+    fireEvent.click(screen.getByLabelText(/Open x\.png preview/i));
+    expect(await screen.findByRole("dialog")).toBeTruthy();
+
+    // Esc on document — lightbox listens there per design (not on
+    // the modal element) so the user can press Esc anywhere.
+    act(() => {
+      const event = new KeyboardEvent("keydown", { key: "Escape", bubbles: true });
+      document.dispatchEvent(event);
+    });
+    await waitFor(() => {
+      expect(screen.queryByRole("dialog")).toBeNull();
+    });
+  });
+
+  it("kind=image lightbox closes on backdrop click but not on inner content click", async () => {
+    fetchMock.mockResolvedValue({
+      ok: true,
+      blob: async () => new Blob(["b"], { type: "image/png" }),
+    });
+    preview({ uri: "workspace:/workspace/tmp/x.png", name: "x.png", mimeType: "image/png" });
+    await screen.findByAltText(/x\.png/);
+    fireEvent.click(screen.getByLabelText(/Open x\.png preview/i));
+    const dialog = await screen.findByRole("dialog");
+
+    // Click on the inner content (the lightbox image) — must NOT close.
+    const lightboxImg = dialog.querySelector("img");
+    if (!lightboxImg) throw new Error("lightbox img missing");
+    fireEvent.click(lightboxImg);
+    expect(screen.queryByRole("dialog")).toBeTruthy();
+
+    // Click on the backdrop (the dialog itself) — closes.
+    fireEvent.click(dialog);
+    await waitFor(() => {
+      expect(screen.queryByRole("dialog")).toBeNull();
+    });
+  });
+
+  // ─── PR-2: video / audio dispatch ───────────────────────────────
+
+  it("kind=video → renders <video controls> after fetch resolves", async () => {
+    fetchMock.mockResolvedValue({
+      ok: true,
+      blob: async () => new Blob(["fake-mp4"], { type: "video/mp4" }),
+    });
+    preview({ uri: "workspace:/workspace/clip.mp4", name: "clip.mp4", mimeType: "video/mp4" });
+    // Loading placeholder first.
+    expect(await screen.findByLabelText(/Loading clip\.mp4/i)).toBeTruthy();
+    // After the blob resolves, a <video> element with controls=true
+    // is in the DOM. Use a tag query — there's no built-in role for
+    // <video>, but the element is unambiguous in the bubble.
+    await waitFor(() => {
+      const v = document.querySelector("video");
+      expect(v).not.toBeNull();
+      // controls attribute pinned — without it the user can't play.
+      expect(v?.hasAttribute("controls")).toBe(true);
+      // src is the blob URL we minted.
+      expect((v as HTMLVideoElement).src).toBe("blob:test-url");
+    });
+    // Chip MUST NOT render — proves dispatch routed to video, not file.
+    expect(screen.queryByTitle(/Download clip\.mp4/i)).toBeNull();
+  });
+
+  it("kind=video fetch fails → falls back to AttachmentChip", async () => {
+    fetchMock.mockResolvedValue({ ok: false, status: 404 });
+    preview({ uri: "workspace:/workspace/missing.mp4", name: "missing.mp4", mimeType: "video/mp4" });
+    await waitFor(() => {
+      expect(screen.getByTitle(/Download missing\.mp4/i)).toBeTruthy();
+    });
+  });
+
+  it("kind=video by extension fallback (no mime) → video path", async () => {
+    fetchMock.mockReturnValue(new Promise(() => {}));
+    preview({ uri: "workspace:/workspace/recording.webm", name: "recording.webm" });
+    expect(await screen.findByLabelText(/Loading recording\.webm/i)).toBeTruthy();
+  });
+
+  it("kind=audio → renders <audio controls> with filename label", async () => {
+    fetchMock.mockResolvedValue({
+      ok: true,
+      blob: async () => new Blob(["fake-mp3"], { type: "audio/mpeg" }),
+    });
+    preview({ uri: "workspace:/workspace/song.mp3", name: "song.mp3", mimeType: "audio/mpeg" });
+    await waitFor(() => {
+      const a = document.querySelector("audio");
+      expect(a).not.toBeNull();
+      expect(a?.hasAttribute("controls")).toBe(true);
+      expect((a as HTMLAudioElement).src).toBe("blob:test-url");
+    });
+    // Filename label pinned: helps the user know what they're hearing
+    // BEFORE pressing play. Multiple matches — `<span>` text and the
+    // `<audio>`'s fallback `{name}` text node — so getAllByText.
+    expect(screen.getAllByText("song.mp3").length).toBeGreaterThan(0);
+  });
+
+  it("kind=audio fetch fails → falls back to chip", async () => {
+    fetchMock.mockResolvedValue({ ok: false, status: 403 });
+    preview({ uri: "workspace:/workspace/locked.wav", name: "locked.wav", mimeType: "audio/wav" });
+    await waitFor(() => {
+      expect(screen.getByTitle(/Download locked\.wav/i)).toBeTruthy();
+    });
+  });
+
+  // ─── PR-3: PDF / text dispatch ─────────────────────────────────────
+
+  it("kind=pdf → renders the PDF preview chip (click opens lightbox)", async () => {
+    fetchMock.mockResolvedValue({
+      ok: true,
+      blob: async () => new Blob(["%PDF-1.4..."], { type: "application/pdf" }),
+    });
+    preview({ uri: "workspace:/workspace/doc.pdf", name: "doc.pdf", mimeType: "application/pdf" });
+
+    // Loading placeholder first.
+    expect(await screen.findByLabelText(/Loading doc\.pdf/i)).toBeTruthy();
+
+    // After fetch, preview chip with "PDF" tag rendered.
+    await waitFor(() => {
+      // The button title is "Preview doc.pdf"; alongside is a "PDF" tag.
+      expect(screen.getByLabelText(/Open doc\.pdf preview/i)).toBeTruthy();
+    });
+
+    // Click → lightbox opens with <embed> inside.
+    fireEvent.click(screen.getByLabelText(/Open doc\.pdf preview/i));
+    const dialog = await screen.findByRole("dialog");
+    expect(dialog).toBeTruthy();
+    expect(dialog.querySelector("embed[type='application/pdf']")).not.toBeNull();
+  });
+
+  it("kind=pdf fetch fails → falls back to chip", async () => {
+    fetchMock.mockResolvedValue({ ok: false, status: 404 });
+    preview({ uri: "workspace:/workspace/missing.pdf", name: "missing.pdf", mimeType: "application/pdf" });
+    await waitFor(() => {
+      expect(screen.getByTitle(/Download missing\.pdf/i)).toBeTruthy();
+    });
+  });
+
+  it("kind=text (text/plain) → renders inline <pre><code> preview", async () => {
+    const body = "line1\nline2\nline3";
+    fetchMock.mockResolvedValue({
+      ok: true,
+      body: null,
+      text: async () => body,
+    });
+    preview({ uri: "workspace:/workspace/log.txt", name: "log.txt", mimeType: "text/plain" });
+
+    // testing-library normalizes whitespace by default. The <pre>
+    // contains the literal text node, so query the DOM directly.
+    await waitFor(() => {
+      const code = document.querySelector("pre code");
+      expect(code).not.toBeNull();
+      expect(code?.textContent).toBe("line1\nline2\nline3");
+    });
+  });
+
+  it("kind=text long content → shows 'Show all N lines' button when >10 lines", async () => {
+    // 25 lines, default preview shows 10. Button labels with full count.
+    const body = Array.from({ length: 25 }, (_, i) => `line ${i + 1}`).join("\n");
+    fetchMock.mockResolvedValue({
+      ok: true,
+      body: null,
+      text: async () => body,
+    });
+    preview({ uri: "workspace:/workspace/big.txt", name: "big.txt", mimeType: "text/plain" });
+
+    await waitFor(() => {
+      expect(screen.getByRole("button", { name: /Show all 25 lines/i })).toBeTruthy();
+    });
+    // Pre-expand: only first 10 lines in <code>; line 11+ absent.
+    let code = document.querySelector("pre code");
+    expect(code?.textContent?.includes("line 10")).toBe(true);
+    expect(code?.textContent?.includes("line 11")).toBe(false);
+
+    // After clicking expand, all 25 lines present.
+    fireEvent.click(screen.getByRole("button", { name: /Show all 25 lines/i }));
+    await waitFor(() => {
+      code = document.querySelector("pre code");
+      expect(code?.textContent?.includes("line 25")).toBe(true);
+    });
+  });
+
+  it("kind=text fetch fails → chip fallback", async () => {
+    fetchMock.mockResolvedValue({ ok: false, status: 404 });
+    preview({ uri: "workspace:/workspace/missing.json", name: "missing.json", mimeType: "application/json" });
+    await waitFor(() => {
+      expect(screen.getByTitle(/Download missing\.json/i)).toBeTruthy();
+    });
+  });
+
+  // ─── universal-fallback regression ─────────────────────────────────
+
+  it("kind=file is the universal fallback for unknown MIME (regression: don't try to preview a zip)", () => {
+    // Critical safety: agent could attach a misnamed file. Pre-fix
+    // the chip path was unconditional; we want unknown MIME to
+    // STILL go to the chip even though the extension matches an
+    // image kind.
+    preview({ uri: "workspace:/workspace/tmp/x.docx", name: "x.docx", mimeType: "application/vnd.zip-disguised-as-doc" });
+    expect(screen.getByTitle(/Download x\.docx/i)).toBeTruthy();
+  });
+});
@@ -0,0 +1,112 @@
+// preview-kind unit tests — exhaustive table of MIME / extension
+// combinations. The kind helper is a pure function; this is the
+// regression line for "what renders as what" across the entire chat
+// surface.
+
+import { describe, it, expect } from "vitest";
+import { getAttachmentPreviewKind } from "../preview-kind";
+
+describe("getAttachmentPreviewKind", () => {
+  describe("strict MIME match", () => {
+    const cases: Array<[string, ReturnType<typeof getAttachmentPreviewKind>]> = [
+      // images
+      ["image/png", "image"],
+      ["image/jpeg", "image"],
+      ["image/gif", "image"],
+      ["image/webp", "image"],
+      ["image/svg+xml", "image"],
+      ["image/avif", "image"],
+      ["IMAGE/PNG", "image"], // case-insensitive
+      ["  image/png  ", "image"], // trim
+      // video
+      ["video/mp4", "video"],
+      ["video/webm", "video"],
+      ["video/quicktime", "video"],
+      // audio
+      ["audio/mpeg", "audio"],
+      ["audio/wav", "audio"],
+      ["audio/ogg", "audio"],
+      // pdf
+      ["application/pdf", "pdf"],
+      // text family
+      ["text/plain", "text"],
+      ["text/markdown", "text"],
+      ["text/html", "text"],
+      ["text/css", "text"],
+      ["text/javascript", "text"],
+      ["text/csv", "text"],
+      ["application/json", "text"],
+      ["application/yaml", "text"],
+      ["application/x-yaml", "text"],
+      ["application/javascript", "text"],
+      ["application/typescript", "text"],
+      // unknown / non-renderable → file
+      ["application/zip", "file"],
+      ["application/octet-stream", "file"],
+      ["application/x-tar", "file"],
+      ["application/vnd.ms-excel", "file"],
+      ["weird/unknown-thing", "file"],
+    ];
+    for (const [mime, expected] of cases) {
+      it(`mimeType=${JSON.stringify(mime)} → ${expected}`, () => {
+        expect(getAttachmentPreviewKind(mime)).toBe(expected);
+      });
+    }
+  });
+
+  describe("extension fallback when MIME is missing or generic", () => {
+    const cases: Array<[string | undefined, string | undefined, string | undefined, ReturnType<typeof getAttachmentPreviewKind>]> = [
+      // [mime, uri, name, expected]
+      [undefined, "workspace:/tmp/screenshot.png", "screenshot.png", "image"],
+      ["", "workspace:/tmp/photo.JPG", "photo.JPG", "image"],
+      ["application/octet-stream", "workspace:/tmp/clip.mp4", "clip.mp4", "video"],
+      [undefined, "workspace:/foo/song.mp3", "song.mp3", "audio"],
+      [undefined, "workspace:/docs/report.pdf", "report.pdf", "pdf"],
+      [undefined, "workspace:/code/main.py", "main.py", "text"],
+      [undefined, "workspace:/data/notes.md", "notes.md", "text"],
+      // No extension → file
+      [undefined, "workspace:/tmp/Dockerfile", "Dockerfile", "file"],
+      // Trailing dot → file
+      [undefined, "workspace:/tmp/weird.", "weird.", "file"],
+      // URL with query string + fragment → strip before parsing
+      [undefined, "https://example.com/foo.png?download=1#anchor", "", "image"],
+      // Unknown extension → file
+      [undefined, "workspace:/tmp/something.xyz", "something.xyz", "file"],
+      // Empty
+      [undefined, "", "", "file"],
+      [undefined, undefined, undefined, "file"],
+    ];
+    for (const [mime, uri, name, expected] of cases) {
+      it(`mime=${mime ?? "<undef>"} uri=${uri} name=${name} → ${expected}`, () => {
+        expect(getAttachmentPreviewKind(mime, uri, name)).toBe(expected);
+      });
+    }
+  });
+
+  describe("MIME wins over extension", () => {
+    it("explicit mime=application/zip + extension=.png → file (don't render zip as image)", () => {
+      // Critical safety: agent might attach a .png-named file that's
+      // actually a zip. The strict-MIME branch wins and we render
+      // the chip, not an <img> that 404s on broken bytes.
+      expect(getAttachmentPreviewKind("application/zip", "x.png", "x.png")).toBe("file");
+    });
+
+    it("explicit mime=text/plain + extension=.png → text", () => {
+      expect(getAttachmentPreviewKind("text/plain", "log.png", "log.png")).toBe("text");
+    });
+  });
+
+  describe("regression: hostile-reviewer cases", () => {
+    it("does NOT misclassify image/svg+xml as text (svg is image even though it has XML)", () => {
+      expect(getAttachmentPreviewKind("image/svg+xml")).toBe("image");
+    });
+
+    it("application/octet-stream + extension=.docx → file (no renderer, don't try)", () => {
+      expect(getAttachmentPreviewKind("application/octet-stream", "f.docx", "f.docx")).toBe("file");
+    });
+
+    it("non-canonical MIME application/json works", () => {
+      expect(getAttachmentPreviewKind("application/json")).toBe("text");
+    });
+  });
+});
@@ -0,0 +1,154 @@
+// preview-kind.ts — single source of truth for "what renderer should
+// this attachment use" (RFC #2991, PR-1).
+//
+// Per the RFC's Phase 2 design, MIME type is the dispatch axis. The
+// wire shape (ChatAttachment.mimeType) already carries it end-to-end
+// from the server's chat_files.go through agent_message_writer.go to
+// the canvas hydrater — we just need to map it to a render kind.
+//
+// Why a separate file from AttachmentPreview.tsx: the kind helper is
+// a pure function that's easier to unit-test in isolation than a
+// React component, and unit tests across MIME families are the
+// regression line for new types added later.
+
+/** The render-kind taxonomy. Each kind has a dedicated component:
+ *
+ *    image  → AttachmentImage (inline thumbnail + click → lightbox)
+ *    video  → AttachmentVideo (HTML5 <video controls>, native fullscreen)
+ *    audio  → AttachmentAudio (HTML5 <audio controls>)
+ *    pdf    → AttachmentPDF (browser-native <embed>, fullscreen modal)
+ *    text   → AttachmentTextPreview (monospace, first N lines, expand)
+ *    file   → AttachmentChip (existing fallback — generic file pill)
+ *
+ * NB: `text` includes JSON, YAML, source code, plain text — anything
+ * that renders sensibly as preformatted ASCII without a specialized
+ * viewer. PR-1 ships only `image` + `file`; PR-2 adds video/audio;
+ * PR-3 adds pdf + text. All routed through this same dispatch table
+ * so adding a new kind is a one-line registration. */
+export type AttachmentPreviewKind = "image" | "video" | "audio" | "pdf" | "text" | "file";
+
+/** Maps a MIME type to the render kind. Falls back to "file" for
+ *  any MIME we don't have a renderer for (current behavior — the
+ *  attachment chip is the universal fallback).
+ *
+ *  Filename-based fallback: when mimeType is missing or generic
+ *  (application/octet-stream), inspect the URI's extension. The
+ *  workspace-server's chat_files.go derives Content-Type from the
+ *  file extension, but agent-emitted attachments may not always
+ *  set mimeType, and the canvas should still preview a file named
+ *  `screenshot.png` even if the wire shape lacks the MIME.
+ *
+ *  Strict MIME match always wins; extension fallback only applies
+ *  to empty / generic. Unknown extension → "file". */
+export function getAttachmentPreviewKind(
+  mimeType: string | undefined,
+  uri?: string,
+  name?: string,
+): AttachmentPreviewKind {
+  const mime = (mimeType ?? "").toLowerCase().trim();
+
+  // Strict MIME match (preferred — set by server's Content-Type
+  // detection or by the agent's explicit mimeType field).
+  if (mime.startsWith("image/")) return "image";
+  if (mime.startsWith("video/")) return "video";
+  if (mime.startsWith("audio/")) return "audio";
+  if (mime === "application/pdf") return "pdf";
+  if (
+    mime.startsWith("text/") ||
+    mime === "application/json" ||
+    mime === "application/yaml" ||
+    mime === "application/x-yaml" ||
+    mime === "application/javascript" ||
+    mime === "application/typescript"
+  ) {
+    return "text";
+  }
+
+  // Extension-based fallback — only when MIME is missing or
+  // application/octet-stream (the server's "I don't know" default).
+  // Skip when MIME is set to something specific we just don't have
+  // a renderer for (e.g. application/zip → file is correct).
+  const looksGeneric = mime === "" || mime === "application/octet-stream";
+  if (looksGeneric) {
+    const ext = extractExtension(uri, name);
+    if (ext) {
+      const kind = EXTENSION_KIND.get(ext);
+      if (kind) return kind;
+    }
+  }
+
+  return "file";
+}
+
+// Extension → kind table for the fallback branch. Keep this list
+// short and curated — every entry is a UX commitment to render
+// inline, and a wrong inference (e.g. .doc rendered as text) is
+// worse than the generic file chip.
+const EXTENSION_KIND: ReadonlyMap<string, AttachmentPreviewKind> = new Map([
+  // Images
+  ["png", "image"],
+  ["jpg", "image"],
+  ["jpeg", "image"],
+  ["gif", "image"],
+  ["webp", "image"],
+  ["svg", "image"],
+  ["avif", "image"],
+  ["bmp", "image"],
+  // Video
+  ["mp4", "video"],
+  ["webm", "video"],
+  ["mov", "video"],
+  ["mkv", "video"],
+  // Audio
+  ["mp3", "audio"],
+  ["wav", "audio"],
+  ["ogg", "audio"],
+  ["m4a", "audio"],
+  ["flac", "audio"],
+  // PDF
+  ["pdf", "pdf"],
+  // Text-ish (rendered as preformatted ASCII)
+  ["txt", "text"],
+  ["md", "text"],
+  ["json", "text"],
+  ["yaml", "text"],
+  ["yml", "text"],
+  ["js", "text"],
+  ["ts", "text"],
+  ["tsx", "text"],
+  ["jsx", "text"],
+  ["py", "text"],
+  ["go", "text"],
+  ["rs", "text"],
+  ["java", "text"],
+  ["c", "text"],
+  ["cpp", "text"],
+  ["h", "text"],
+  ["hpp", "text"],
+  ["sh", "text"],
+  ["bash", "text"],
+  ["html", "text"],
+  ["css", "text"],
+  ["sql", "text"],
+  ["toml", "text"],
+  ["ini", "text"],
+  ["xml", "text"],
+  ["csv", "text"],
+  ["log", "text"],
+]);
+
+/** Extracts the lowercased extension from a uri or name, without
+ *  the leading dot. Returns "" when no extension is present. */
+function extractExtension(uri: string | undefined, name: string | undefined): string {
+  // Prefer name (always a leaf path); fall back to uri's last
+  // segment. Strip query string + fragment so a URI like
+  // "https://example.com/foo.png?download=1" still parses as png.
+  const candidate = name || uri || "";
+  if (!candidate) return "";
+  let leaf = candidate.split(/[\\/]/).pop() || "";
+  // Drop ?query and #fragment.
+  leaf = leaf.split(/[?#]/)[0];
+  const dot = leaf.lastIndexOf(".");
+  if (dot < 0 || dot === leaf.length - 1) return "";
+  return leaf.slice(dot + 1).toLowerCase();
+}
@@ -212,8 +212,8 @@ services:
    #   docker compose pull canvas && docker compose up -d canvas
    # First-time local setup or testing unreleased changes — build from source:
    #   docker compose build canvas && docker compose up -d canvas
-    # Note: GHCR images are private — `docker login ghcr.io` required before pull.
-    image: ghcr.io/molecule-ai/canvas:latest
+    # Note: ECR images require AWS auth — `aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 153263036946.dkr.ecr.us-east-2.amazonaws.com` before pull.
+    image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:latest
    build:
      context: ./canvas
      dockerfile: Dockerfile
@@ -4,7 +4,7 @@ How a workspace-server code change reaches the prod tenant fleet — and how to

 > **⚠️ State note (2026-04-22):** this doc describes the **intended design**. As of this write, the canary fleet described below is **not actually running** — no canary tenants are provisioned, `CANARY_TENANT_URLS` / `CANARY_ADMIN_TOKENS` / `CANARY_CP_SHARED_SECRET` are empty in repo secrets, and `canary-verify.yml` fails every run.
 >
-> Current merges gate on manual `promote-latest.yml` dispatches, not canary. See [molecule-controlplane/docs/canary-tenants.md](https://github.com/Molecule-AI/molecule-controlplane/blob/main/docs/canary-tenants.md) for the Phase 1 code work that's already shipped + the Phase 2 plan for actually standing up the fleet + a "should we even do this now?" decision framework.
+> Current merges gate on manual `promote-latest.yml` dispatches, not canary. See [molecule-controlplane/docs/canary-tenants.md](https://git.moleculesai.app/molecule-ai/molecule-controlplane/src/branch/main/docs/canary-tenants.md) for the Phase 1 code work that's already shipped + the Phase 2 plan for actually standing up the fleet + a "should we even do this now?" decision framework.
 >
 > **Account-specific identifiers (AWS account ID, IAM role name) referenced below in the original design have been redacted from this public doc.** The actual values — if they exist — are in `Molecule-AI/internal/runbooks/canary-fleet.md`. If you're implementing Phase 2, start there.
 >
@@ -1,7 +1,7 @@
 # Molecule AI — Comprehensive Technical Documentation

 > Definitive technical reference for the Molecule AI Agent Team platform.
-> Based on a full non-invasive scan of the [molecule-monorepo](https://github.com/Molecule-AI/molecule-monorepo) repository.
+> Based on a full non-invasive scan of the [molecule-monorepo](https://git.moleculesai.app/molecule-ai/molecule-monorepo) repository.

 ---

@@ -1149,11 +1149,11 @@ Molecule AI's workspace abstraction is **runtime-agnostic by design**. A workspa

 ## Links

- **GitHub**: https://github.com/Molecule-AI/molecule-monorepo
- **Architecture Docs**: https://github.com/Molecule-AI/molecule-monorepo/tree/main/docs/architecture
- **API Protocol**: https://github.com/Molecule-AI/molecule-monorepo/tree/main/docs/api-protocol
- **Agent Runtime**: https://github.com/Molecule-AI/molecule-monorepo/tree/main/docs/agent-runtime
- **Product Docs**: https://github.com/Molecule-AI/molecule-monorepo/tree/main/docs/product
+- **GitHub**: https://git.moleculesai.app/molecule-ai/molecule-monorepo
+- **Architecture Docs**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/architecture
+- **API Protocol**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/api-protocol
+- **Agent Runtime**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/agent-runtime
+- **Product Docs**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/product

 ---

@@ -79,7 +79,7 @@ For SOC2 / ISO 27001 / customer security questionnaires:

 ## Pointers

- KMS envelope code: [`molecule-controlplane/internal/crypto/kms.go`](https://github.com/Molecule-AI/molecule-controlplane/blob/main/internal/crypto/kms.go)
- Static-key fallback: [`molecule-controlplane/internal/crypto/aes.go`](https://github.com/Molecule-AI/molecule-controlplane/blob/main/internal/crypto/aes.go)
+- KMS envelope code: [`molecule-controlplane/internal/crypto/kms.go`](https://git.moleculesai.app/molecule-ai/molecule-controlplane/src/branch/main/internal/crypto/kms.go)
+- Static-key fallback: [`molecule-controlplane/internal/crypto/aes.go`](https://git.moleculesai.app/molecule-ai/molecule-controlplane/src/branch/main/internal/crypto/aes.go)
 - Tenant secrets handler: [`workspace-server/internal/crypto/aes.go`](../../workspace-server/internal/crypto/aes.go)
 - Tenant secrets schema: [database-schema.md](./database-schema.md#workspace_secrets)
@@ -0,0 +1,28 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64">
+  <style>
+    .bg { fill: #0a1120; }
+    .accent { fill: #7fe8d6; }
+    .accent-stroke { stroke: #7fe8d6; }
+    @media (prefers-color-scheme: light) {
+      .bg { fill: #f5f7fa; }
+      .accent { fill: #1a8a72; }
+      .accent-stroke { stroke: #1a8a72; }
+    }
+  </style>
+  <rect class="bg" width="64" height="64" rx="14"/>
+  <g class="accent-stroke" stroke-width="2.4" stroke-linecap="round" fill="none">
+    <line x1="32" y1="32" x2="12" y2="14"/>
+    <line x1="32" y1="32" x2="52" y2="18"/>
+    <line x1="32" y1="32" x2="10" y2="40"/>
+    <line x1="32" y1="32" x2="54" y2="44"/>
+    <line x1="32" y1="32" x2="32" y2="56"/>
+  </g>
+  <g class="accent">
+    <circle cx="32" cy="32" r="6.5"/>
+    <circle cx="12" cy="14" r="3.5"/>
+    <circle cx="52" cy="18" r="3.5"/>
+    <circle cx="10" cy="40" r="3.5"/>
+    <circle cx="54" cy="44" r="3.5"/>
+    <circle cx="32" cy="56" r="3.5"/>
+  </g>
+</svg>
@@ -0,0 +1,17 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" role="img" aria-label="Molecule AI">
+  <g stroke="#7fe8d6" stroke-width="2.6" stroke-linecap="round" fill="none">
+    <line x1="32" y1="32" x2="12" y2="14"/>
+    <line x1="32" y1="32" x2="52" y2="18"/>
+    <line x1="32" y1="32" x2="10" y2="40"/>
+    <line x1="32" y1="32" x2="54" y2="44"/>
+    <line x1="32" y1="32" x2="32" y2="56"/>
+  </g>
+  <g fill="#7fe8d6">
+    <circle cx="32" cy="32" r="7"/>
+    <circle cx="12" cy="14" r="3.6"/>
+    <circle cx="52" cy="18" r="3.6"/>
+    <circle cx="10" cy="40" r="3.6"/>
+    <circle cx="54" cy="44" r="3.6"/>
+    <circle cx="32" cy="56" r="3.6"/>
+  </g>
+</svg>
@@ -299,8 +299,8 @@ Or use the Canvas UI: Workspace → Config → MCP Servers → Add browser MCP s

 **Try it free** — Molecule AI is open source and self-hostable. Get a workspace running in under 5 minutes.

-→ [Get started on GitHub →](https://github.com/Molecule-AI/molecule-core)
+→ [Get started on GitHub →](https://git.moleculesai.app/molecule-ai/molecule-core)

 ---

-*Have a browser automation use case you want to see covered? Open a discussion on [GitHub Discussions](https://github.com/Molecule-AI/molecule-core/discussions) — or file an issue with the `enhancement` label.*
+*Have a browser automation use case you want to see covered? File an issue with the `enhancement` label on the [molecule-core issue tracker](https://git.moleculesai.app/molecule-ai/molecule-core/issues).*
@@ -148,7 +148,7 @@ Then follow the [quick-start guide](/docs/guides/remote-workspaces.md).
 Or run the annotated example directly:

 ```bash
-git clone https://github.com/Molecule-AI/molecule-sdk-python
+git clone https://git.moleculesai.app/molecule-ai/molecule-sdk-python
 cd molecule-sdk-python/examples/remote-agent
 # Create workspace with runtime:external, grab the ID, then:
 WORKSPACE_ID=<your-id> PLATFORM_URL=https://acme.moleculesai.app python3 run.py
@@ -160,6 +160,6 @@ The agent appears on the canvas within seconds.

 → [Remote Workspaces Guide →](/docs/guides/remote-workspaces.md)
 → [External Agent Registration Reference →](/docs/guides/external-agent-registration.md)
-→ [molecule-sdk-python →](https://github.com/Molecule-AI/molecule-sdk-python)
+→ [molecule-sdk-python →](https://git.moleculesai.app/molecule-ai/molecule-sdk-python)

 *Phase 30 shipped in PRs #1075–#1083 and #1085–#1100 on `molecule-core`.*
@@ -133,4 +133,4 @@ With protocol-native A2A, you get:

 Molecule AI's external agent registration is production-ready. Documentation is live at [External Agent Registration Guide](https://docs.molecule.ai/docs/guides/external-agent-registration). The npm package for the MCP server is available at [`@molecule-ai/mcp-server`](https://www.npmjs.com/package/@molecule-ai/mcp-server).

-Read the full [A2A v1.0 protocol spec](https://github.com/Molecule-AI/molecule-core/blob/main/docs/api-protocol/a2a-protocol.md) on GitHub.
+Read the full [A2A v1.0 protocol spec](https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/api-protocol/a2a-protocol.md) on GitHub.
@@ -45,7 +45,7 @@ canonicalUrl: "https://docs.molecule.ai/blog/remote-workspaces"
  " proficiencyLevel": "Expert",
  "genre": ["technical documentation", "product announcement"],
  "sameAs": [
-    "https://github.com/Molecule-AI/molecule-core",
+    "https://git.moleculesai.app/molecule-ai/molecule-core",
    "https://molecule.ai"
  ]
 }
@@ -270,7 +270,7 @@ Configure it in your project's `.mcp.json` and any AI agent (Claude Code, Cursor

 → [External Agent Registration Guide](/docs/guides/external-agent-registration) — full step-by-step with Python and Node.js reference implementations

-→ [GitHub: molecule-core](https://github.com/Molecule-AI/molecule-core) — source and issues
+→ [GitHub: molecule-core](https://git.moleculesai.app/molecule-ai/molecule-core) — source and issues

 → [Phase 30 Launch Thread on X](https://x.com) — follow for updates

@@ -170,4 +170,4 @@ The `staging` branch is now on `a2a-sdk` 1.0.0. The `main` branch still carries

 If you're running `a2a-sdk` 0.3.x and planning the 1.0.0 migration, this post is the reference. The four breaking changes are well-contained, the migration is a single PR, and the eight smoke scenarios above will tell you whether the upgrade is clean before you merge.

-Questions? The [A2A protocol spec](https://github.com/google-a2a/a2a-specification) is the authoritative source. For Molecule AI's production A2A implementation, see [External Agent Registration](https://docs.molecule.ai/docs/guides/external-agent-registration) or open an issue in the [molecule-core](https://github.com/Molecule-AI/molecule-core) repo.
+Questions? The [A2A protocol spec](https://github.com/google-a2a/a2a-specification) is the authoritative source. For Molecule AI's production A2A implementation, see [External Agent Registration](https://docs.molecule.ai/docs/guides/external-agent-registration) or open an issue in the [molecule-core](https://git.moleculesai.app/molecule-ai/molecule-core) repo.
@@ -215,7 +215,7 @@ Push mode (this guide) works today but requires an inbound-reachable URL — whi

 Your agent makes only outbound HTTPS calls to the platform, pulling messages from an inbox queue and posting replies back. Works behind any NAT/firewall, tolerates offline laptops, no tunnel needed.

-See the [design doc](https://github.com/Molecule-AI/internal/blob/main/product/external-workspaces-polling.md) (internal) and [implementation tracking issue](https://github.com/Molecule-AI/molecule-core/issues?q=polling+mode) once opened.
+See the [design doc](https://git.moleculesai.app/molecule-ai/internal/src/branch/main/product/external-workspaces-polling.md) (internal) and the implementation tracking issue (search `polling+mode` on the [molecule-core issue tracker](https://git.moleculesai.app/molecule-ai/molecule-core/issues)).

 ---

@@ -143,5 +143,5 @@ The agent appears on the canvas with a **purple REMOTE badge** within seconds. F
 ## Next Steps

 - **[External Agent Registration Guide →](/docs/guides/external-agent-registration)** — full endpoint reference, Python + Node.js examples, troubleshooting
- **[molecule-sdk-python →](https://github.com/Molecule-AI/molecule-sdk-python)** — SDK source, `RemoteAgentClient` API docs
- **[SDK Examples →](https://github.com/Molecule-AI/molecule-sdk-python/tree/main/examples/remote-agent)** — `run.py` demo script, annotated walkthrough
+- **[molecule-sdk-python →](https://git.moleculesai.app/molecule-ai/molecule-sdk-python)** — SDK source, `RemoteAgentClient` API docs
+- **[SDK Examples →](https://git.moleculesai.app/molecule-ai/molecule-sdk-python/src/branch/main/examples/remote-agent)** — `run.py` demo script, annotated walkthrough
@@ -61,7 +61,7 @@ molecule skills install arxiv-research --from community

 Community skills are reviewed by the Molecule AI team before being
 listed. Submit a skill for review by opening a PR against
-[`molecule-ai/skills`](https://github.com/Molecule-AI/skills).
+[`molecule-ai/skills`](https://git.moleculesai.app/molecule-ai/skills).

 ## Installing via config.yaml

@@ -151,7 +151,7 @@ molecule skills bundle my-custom-skill --output ./org-templates/my-role/
 ```

 **Publishing to the community:** Open a PR against
-[`molecule-ai/skills`](https://github.com/Molecule-AI/skills) with a
+[`molecule-ai/skills`](https://git.moleculesai.app/molecule-ai/skills) with a
 complete skill package. Community skills are reviewed for security and
 correctness before listing.

@@ -96,7 +96,7 @@ fork needed in production.
  `resolve_platform_id` for plugin-platform-safe deserialization, and
  `self.adapters[adapter.platform]` keying fix (caught by real-subprocess
  test before merge — see below).
- **Plugin package**: [Molecule-AI/hermes-platform-molecule-a2a](https://github.com/Molecule-AI/hermes-platform-molecule-a2a)
+- **Plugin package**: [Molecule-AI/hermes-platform-molecule-a2a](https://git.moleculesai.app/molecule-ai/hermes-platform-molecule-a2a)
  v0.1.0 — public, MIT-licensed. 11 unit tests + 8 in-process E2E
  + 4 real-subprocess E2E checkpoints all green.
 - **Workspace template patch**: [Molecule-AI/molecule-ai-workspace-template-hermes#32](https://github.com/Molecule-AI/molecule-ai-workspace-template-hermes/pull/32)
@@ -154,7 +154,7 @@ intermediate shim earns its complexity.
 ## Codex (OpenAI Codex CLI)

 **Status:** Template SHIPPED. Repo live at
-[`Molecule-AI/molecule-ai-workspace-template-codex`](https://github.com/Molecule-AI/molecule-ai-workspace-template-codex)
+[`Molecule-AI/molecule-ai-workspace-template-codex`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-codex)
 (14 files, 1411 LOC, 12/12 tests). molecule-core registration in
 [PR #2512](https://github.com/Molecule-AI/molecule-core/pull/2512).
 E2E with real A2A traffic remains.
@@ -17,7 +17,7 @@ This path is aligned to the current repository and current UI. It gets you from
 ## The one-command path

 ```bash
-git clone https://github.com/Molecule-AI/molecule-monorepo.git
+git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
 cd molecule-monorepo
 ./scripts/dev-start.sh
 ```
@@ -42,7 +42,7 @@ If you'd rather run each component yourself — useful when you're iterating on
 ### Step 1: Clone the repository

 ```bash
-git clone https://github.com/Molecule-AI/molecule-monorepo.git
+git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
 cd molecule-monorepo
 ```

@@ -98,14 +98,14 @@ Each of the 8 adapter template repos contains:

 | Adapter | Repo |
 |---------|------|
-| claude-code | https://github.com/Molecule-AI/molecule-ai-workspace-template-claude-code |
-| langgraph | https://github.com/Molecule-AI/molecule-ai-workspace-template-langgraph |
-| crewai | https://github.com/Molecule-AI/molecule-ai-workspace-template-crewai |
-| autogen | https://github.com/Molecule-AI/molecule-ai-workspace-template-autogen |
-| deepagents | https://github.com/Molecule-AI/molecule-ai-workspace-template-deepagents |
-| hermes | https://github.com/Molecule-AI/molecule-ai-workspace-template-hermes |
-| gemini-cli | https://github.com/Molecule-AI/molecule-ai-workspace-template-gemini-cli |
-| openclaw | https://github.com/Molecule-AI/molecule-ai-workspace-template-openclaw |
+| claude-code | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-claude-code |
+| langgraph | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-langgraph |
+| crewai | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-crewai |
+| autogen | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-autogen |
+| deepagents | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-deepagents |
+| hermes | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-hermes |
+| gemini-cli | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-gemini-cli |
+| openclaw | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-openclaw |

 ## Adapter discovery (ADAPTER_MODULE)

@@ -244,7 +244,7 @@ correctness before pushing a `runtime-v*` tag.
 ## Writing a new adapter

 Use the GitHub template repo
-[`Molecule-AI/molecule-ai-workspace-template-starter`](https://github.com/Molecule-AI/molecule-ai-workspace-template-starter)
+[`molecule-ai/molecule-ai-workspace-template-starter`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-starter) (note: the starter repo did not survive the 2026-05-06 GitHub-org-suspension migration; recreation tracked at internal#41)
 — it ships with the canonical Dockerfile + adapter.py skeleton + config.yaml
 schema + the `repository_dispatch: [runtime-published]` cascade receiver
 already wired up. No follow-up setup PR required.
@@ -256,7 +256,7 @@ gh repo create Molecule-AI/molecule-ai-workspace-template-<runtime> \
  --public \
  --description "Molecule AI workspace template: <runtime>"

-git clone https://github.com/Molecule-AI/molecule-ai-workspace-template-<runtime>
+git clone https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>.git
 cd molecule-ai-workspace-template-<runtime>
 ```

@@ -286,7 +286,7 @@ After `git push`:
 If the canonical shape changes (e.g. `config.yaml` schema gets a new field,
 the `BaseAdapter` interface adds a method, the reusable CI workflow
 signature changes), update the
-[starter](https://github.com/Molecule-AI/molecule-ai-workspace-template-starter)
+[starter](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-starter) (recreation pending — see note above)
 **first**. Existing templates can either migrate at their own pace or be
 touched in a coordinated cleanup PR. Either way, future templates pick up
 the new shape from day one.
@@ -11,7 +11,7 @@ There are three related scripts; pick the right one:
 |---|---|---|
 | `measure-coordinator-task-bounds.sh` | **Canonical** v1 harness for the RFC #2251 / Issue 4 reproduction. Provisions a PM coordinator + Researcher child via `claude-code-default` + `langgraph` templates, sends a synthesis-heavy A2A kickoff, observes elapsed time + activity trace. | OSS-shape platform — localhost or any `/workspaces`-shaped endpoint. Has tenant/admin-token guards for non-localhost runs. |
 | `measure-coordinator-task-bounds-runner.sh` | Generalised runner for the same measurement contract but with **arbitrary template + secret + model combinations** (Hermes/MiniMax, etc.). Useful for cross-runtime variants without modifying the canonical harness. | Same as above (local or SaaS via `MODE=saas`). |
-| `measure-coordinator-task-bounds.sh` (in [molecule-controlplane](https://github.com/Molecule-AI/molecule-controlplane)) | **Production-shape** variant that bootstraps a real staging tenant via `POST /cp/admin/orgs`, then runs the same measurement against `<slug>.staging.moleculesai.app`. | Staging controlplane only — refuses to run against production. |
+| `measure-coordinator-task-bounds.sh` (in [molecule-controlplane](https://git.moleculesai.app/molecule-ai/molecule-controlplane)) | **Production-shape** variant that bootstraps a real staging tenant via `POST /cp/admin/orgs`, then runs the same measurement against `<slug>.staging.moleculesai.app`. | Staging controlplane only — refuses to run against production. |

 See `reference_harness_pair_pattern` (auto-memory) for when to use which
 and the cross-repo design rationale.
@@ -278,7 +278,7 @@ include = ["molecule_runtime*"]
 README_TEMPLATE = """\
 # molecule-ai-workspace-runtime

-Shared workspace runtime for [Molecule AI](https://github.com/Molecule-AI/molecule-core)
+Shared workspace runtime for [Molecule AI](https://git.moleculesai.app/molecule-ai/molecule-core)
 agent adapters. Installed by every workspace template image
 (`workspace-template-claude-code`, `-langgraph`, `-hermes`, etc.) to provide
 A2A delegation, heartbeat, memory, plugin loading, and skill management.
@@ -396,7 +396,7 @@ If you don't need real-time push, the default poll path works
 universally with no extra setup; both modes converge on the same
 `inbox_pop` ack so messages never duplicate.

-See [`docs/workspace-runtime-package.md`](https://github.com/Molecule-AI/molecule-core/blob/main/docs/workspace-runtime-package.md)
+See [`docs/workspace-runtime-package.md`](https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/workspace-runtime-package.md)
 for the publish flow and architecture.
 """

@@ -10,11 +10,11 @@
 #           → PyPI auto-bumps molecule-ai-workspace-runtime patch version
 #           → repository_dispatch fans out to 8 workspace-template-* repos
 #           → each template repo rebuilds and re-tags
-#             ghcr.io/molecule-ai/workspace-template-<runtime>:latest
+#             153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/workspace-template-<runtime>:latest
 #
 #   PATH 2: any merge to a workspace-template-* repo's main branch
 #           → that repo's publish-image.yml fires
-#           → ghcr.io/molecule-ai/workspace-template-<runtime>:latest
+#           → 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/workspace-template-<runtime>:latest
 #             gets re-tagged
 #
 #   provisioner.go:296 RuntimeImages[runtime] reads `:latest` at every
@@ -51,7 +51,7 @@ log "pulling latest images for: ${RUNTIMES[*]}"
 PULLED=()
 FAILED=()
 for rt in "${RUNTIMES[@]}"; do
-  IMG="ghcr.io/molecule-ai/workspace-template-$rt:latest"
+  IMG="153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/workspace-template-$rt:latest"
  if docker pull "$IMG" >/dev/null 2>&1; then
    log "  ✓ $rt"
    PULLED+=("$rt")
@@ -1,9 +1,10 @@
 #!/bin/bash
-# rollback-latest.sh — moves the :latest tag on ghcr.io/molecule-ai/platform
-# (and the matching tenant image) back to a prior :staging-<sha> digest
-# without rebuilding anything. Prod tenants auto-pull :latest every 5
-# min, so this is the fast path when a canary-verified image turns out
-# to have a runtime regression that canary didn't catch.
+# rollback-latest.sh — moves the :latest tag on the platform image
+# (and the matching tenant image) on AWS ECR back to a prior
+# :staging-<sha> digest without rebuilding anything. Prod tenants
+# auto-pull :latest every 5 min, so this is the fast path when a
+# canary-verified image turns out to have a runtime regression that
+# canary didn't catch.
 #
 # Usage:
 #   scripts/rollback-latest.sh <sha>
@@ -12,12 +13,14 @@
 # Prereqs:
 #   - crane on $PATH (brew install crane OR download from
 #     https://github.com/google/go-containerregistry/releases)
-#   - GHCR token exported as GITHUB_TOKEN with write:packages scope
+#   - aws CLI authenticated for region us-east-2 with ECR pull/push
+#     access to the molecule-ai/platform + platform-tenant repositories.
+#     `aws sts get-caller-identity` should succeed.
 #
 # What it does (per image — platform + tenant):
-#   crane digest ghcr.io/…:<sha>         # verify the target sha exists
-#   crane tag    ghcr.io/…:<sha> latest  # retag remotely, single API call
-#   crane digest ghcr.io/…:latest        # confirm the move
+#   crane digest <ecr>:<sha>         # verify the target sha exists
+#   crane tag    <ecr>:<sha> latest  # retag remotely, single API call
+#   crane digest <ecr>:latest        # confirm the move
 #
 # Exit codes: 0 = both retagged, 1 = tag missing / crane error, 2 = bad args.

@@ -30,21 +33,23 @@ if [ "${1:-}" = "" ]; then
 fi

 TARGET_SHA="$1"
-PLATFORM=ghcr.io/molecule-ai/platform
-TENANT=ghcr.io/molecule-ai/platform-tenant
+ECR_HOST=153263036946.dkr.ecr.us-east-2.amazonaws.com
+PLATFORM=$ECR_HOST/molecule-ai/platform
+TENANT=$ECR_HOST/molecule-ai/platform-tenant

 if ! command -v crane >/dev/null; then
  echo "ERROR: crane not installed. brew install crane" >&2
  exit 1
 fi
-if [ -z "${GITHUB_TOKEN:-}" ]; then
-  echo "ERROR: GITHUB_TOKEN unset. export it with write:packages scope." >&2
+if ! command -v aws >/dev/null; then
+  echo "ERROR: aws CLI not installed. brew install awscli" >&2
  exit 1
 fi

-# Log in once. crane stores creds in a config file keyed by registry;
-# re-running is cheap.
-printf '%s\n' "$GITHUB_TOKEN" | crane auth login ghcr.io -u "${GITHUB_ACTOR:-$(whoami)}" --password-stdin >/dev/null
+# Log in once. ECR auth is via short-lived password from `aws ecr
+# get-login-password`. crane stores creds in a config file keyed by
+# registry; re-running is cheap.
+aws ecr get-login-password --region us-east-2 | crane auth login "$ECR_HOST" -u AWS --password-stdin >/dev/null

 roll() {
  local image="$1"
@@ -18,7 +18,7 @@
 #
 # Or inline via curl:
 #
-#     bash <(curl -fsSL https://raw.githubusercontent.com/Molecule-AI/molecule-core/main/tools/check-template-parity.sh) \
+#     bash <(curl -fsSL https://git.moleculesai.app/molecule-ai/molecule-core/raw/branch/main/tools/check-template-parity.sh) \
 #          install.sh start.sh
 #
 # Exit codes:
@@ -5,15 +5,11 @@

 FROM golang:1.25-alpine AS builder
 WORKDIR /app
-# Plugin source for replace directive in go.mod
-COPY molecule-ai-plugin-github-app-auth/ /plugin/
 COPY workspace-server/go.mod workspace-server/go.sum ./
-# Add replace directives for Docker builds:
-# 1. Platform → plugin (plugin source at /plugin/)
-# 2. Plugin → platform (plugin's go.mod has a relative replace that doesn't
-#    work in Docker; fix it to point at /app where the platform source lives)
-RUN echo 'replace github.com/Molecule-AI/molecule-ai-plugin-github-app-auth => /plugin' >> go.mod
-RUN sed -i 's|replace github.com/Molecule-AI/molecule-monorepo/platform => .*|replace github.com/Molecule-AI/molecule-monorepo/platform => /app|' /plugin/go.mod
+# github-app-auth plugin removed 2026-05-07 (#157): per-agent Gitea
+# identities replaced the GitHub-App-installation token flow after the
+# 2026-05-06 suspension. Pre-removal this stage COPY'd the sibling
+# plugin repo + injected a `replace` directive; both are gone.
 RUN go mod download
 COPY workspace-server/ .
 # GIT_SHA mirror of Dockerfile.tenant — see that file for the rationale.
@@ -16,9 +16,10 @@
 # ── Stage 1: Go platform binary ──────────────────────────────────────
 FROM golang:1.25-alpine AS go-builder
 WORKDIR /app
-COPY molecule-ai-plugin-github-app-auth/ /plugin/
 COPY workspace-server/go.mod workspace-server/go.sum ./
-RUN echo 'replace github.com/Molecule-AI/molecule-ai-plugin-github-app-auth => /plugin' >> go.mod
+# github-app-auth plugin removed 2026-05-07 (#157): per-agent Gitea
+# identities replaced GitHub-App tokens post-suspension. The sibling
+# COPY + replace directive are gone.
 RUN go mod download
 COPY workspace-server/ .

@@ -21,6 +21,7 @@ import (
 	"os"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )

 // verifyConfig is the typed dependency bundle for verifyParity.
@@ -121,7 +122,7 @@ func verifyParity(ctx context.Context, cfg verifyConfig, stdout *os.File) (*veri
 		matched := true
 		for _, c := range legacy {
 			if pluginContents[c] == 0 {
-				fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, truncate(c, 80))
+				fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, textutil.TruncateBytes(c, 80))
 				matched = false
 				break
 			}
@@ -192,9 +193,4 @@ func queryLegacyMemories(ctx context.Context, db *sql.DB, workspaceID string) ([
 	return out, rows.Err()
 }

-func truncate(s string, n int) string {
-	if len(s) <= n {
-		return s
-	}
-	return s[:n] + "…"
-}
+// truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
@@ -349,16 +349,8 @@ func TestVerifyParity_PickSampleError(t *testing.T) {
 	}
 }

-// --- Truncate ---
-
-func TestVerifyTruncate(t *testing.T) {
-	if got := truncate("short", 10); got != "short" {
-		t.Errorf("got %q", got)
-	}
-	if got := truncate(strings.Repeat("a", 200), 10); !strings.HasSuffix(got, "…") {
-		t.Errorf("expected ellipsis: %q", got)
-	}
-}
+// Truncate moved to internal/textutil — coverage in
+// internal/textutil/truncate_test.go (TestTruncateBytes_RuneBoundary).

 // --- CLI: -verify mode ---

@@ -29,8 +29,7 @@ import (

 	// External plugins — each registers EnvMutator(s) that run at workspace
 	// provision time. Loaded via soft-dep gates in main() so self-hosters
-	// without the App or without per-agent identity configured keep working.
-	githubappauth "github.com/Molecule-AI/molecule-ai-plugin-github-app-auth/pluginloader"
+	// without per-agent identity configured keep working.
 	ghidentity "github.com/Molecule-AI/molecule-ai-plugin-gh-identity/pluginloader"

 	"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
@@ -179,12 +178,15 @@ func main() {
 	}

 	// External-plugin env mutators — each plugin contributes 0+ mutators
-	// onto a shared registry. Order matters: gh-identity populates
-	// MOLECULE_AGENT_ROLE-derived attribution env vars that downstream
-	// mutators and the workspace's install.sh can then read. Keep
-	// github-app-auth last because it fails loudly on misconfig and its
-	// failure mode is "no GITHUB_TOKEN" — worth surfacing after the
-	// cheaper mutators already ran.
+	// onto a shared registry. gh-identity populates MOLECULE_AGENT_ROLE-
+	// derived attribution env vars that the workspace's install.sh can
+	// then read.
+	//
+	// github-app-auth was dropped 2026-05-07 (closes #157): per-agent
+	// Gitea identities (this gh-identity plugin's role-derived path)
+	// replaced GitHub-App-installation tokens after the 2026-05-06
+	// suspension. Workspaces now provision with a per-persona Gitea PAT
+	// from .env instead of an App-rotated GITHUB_TOKEN.
 	envReg := provisionhook.NewRegistry()

 	// gh-identity plugin — per-agent attribution via env injection + gh
@@ -198,26 +200,6 @@ func main() {
 		log.Printf("gh-identity: registered (config file=%q)", os.Getenv("MOLECULE_GH_IDENTITY_CONFIG_FILE"))
 	}

-	// github-app-auth plugin — injects GITHUB_TOKEN + GH_TOKEN into every
-	// workspace env using the App's installation access token (rotates ~hourly).
-	// Soft-skip when GITHUB_APP_* env vars are absent so dev/self-hosters
-	// without an App configured keep working; fail-loud only on MISCONFIG
-	// (e.g. APP_ID set but key file missing), not on unset.
-	if os.Getenv("GITHUB_APP_ID") != "" {
-		if reg, err := githubappauth.BuildRegistry(); err != nil {
-			log.Fatalf("github-app-auth plugin: %v", err)
-		} else {
-			// Copy the plugin's mutators onto the shared registry so the
-			// TokenProvider probe (FirstTokenProvider) still finds them.
-			for _, m := range reg.Mutators() {
-				envReg.Register(m)
-			}
-			log.Printf("github-app-auth: registered, %d mutator(s) added to chain", reg.Len())
-		}
-	} else {
-		log.Println("github-app-auth: GITHUB_APP_ID unset — skipping plugin registration (agents will use any PAT from .env)")
-	}
-
 	wh.SetEnvMutators(envReg)
 	log.Printf("env-mutator chain: %v", envReg.Names())

@@ -266,6 +248,19 @@ func main() {
 		})
 	}

+	// CP-mode orphan sweeper — SaaS counterpart to the Docker sweeper
+	// above. Re-issues cpProv.Stop for any workspace at status='removed'
+	// with a non-NULL instance_id, healing the deprovision split-write
+	// race documented in #2989: tenant marks status='removed' BEFORE
+	// calling CP DELETE, so a transient CP failure leaves the EC2
+	// running with no retry path. cpProv.Stop is idempotent against
+	// already-terminated instances; on success we clear instance_id.
+	if cpProv != nil {
+		go supervised.RunWithRecover(ctx, "cp-orphan-sweeper", func(c context.Context) {
+			registry.StartCPOrphanSweeper(c, cpProv)
+		})
+	}
+
 	// Pending-uploads GC sweep — deletes acked rows past their retention
 	// window plus unacked rows past expires_at. Without this the
 	// pending_uploads table grows unbounded; even with the 24h hard TTL,
@@ -5,7 +5,6 @@ go 1.25.0
 require (
 	github.com/DATA-DOG/go-sqlmock v1.5.2
 	github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f
-	github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d
 	github.com/alicebob/miniredis/v2 v2.37.0
 	github.com/creack/pty v1.1.24
 	github.com/docker/docker v28.5.2+incompatible
@@ -6,8 +6,6 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f h1:YkLRhUg+9qr9OV9N8dG1Hj0Ml7TThHlRwh5F//oUJVs=
 github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f/go.mod h1:NqdtlWZDJvpXNJRHnMkPhTKHdA1LZTNH+63TB66JSOU=
-github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d h1:GpYhP6FxaJZc1Ljy5/YJ9ZIVGvfOqZBmDolNr2S5x2g=
-github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d/go.mod h1:3a6LR/zd7FjR9ZwLTbytwYlWuCBsbCOVFlEg0WnoYiM=
 github.com/alicebob/miniredis/v2 v2.37.0 h1:RheObYW32G1aiJIj81XVt78ZHJpHonHLHW7OLIshq68=
 github.com/alicebob/miniredis/v2 v2.37.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM=
 github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
@@ -51,7 +51,7 @@ func Import(
 		return result
 	}

-	_ = broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISIONING", wsID, map[string]interface{}{
+	_ = broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisioning), wsID, map[string]interface{}{
 		"name":             b.Name,
 		"tier":             b.Tier,
 		"source_bundle_id": b.ID,
@@ -142,7 +142,7 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
 	db.DB.ExecContext(ctx,
 		`UPDATE workspaces SET status = $1, last_sample_error = $2, updated_at = now() WHERE id = $3`,
 		models.StatusFailed, msg, wsID)
-	broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", wsID, map[string]interface{}{
+	broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
 		"error": msg,
 	})
 }
@@ -10,6 +10,7 @@ import (
 	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 )

 const (
@@ -304,14 +305,14 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
 				"parts":     []map[string]interface{}{{"kind": "text", "text": msg.Text}},
 			},
 			"metadata": map[string]interface{}{
-				"source":       ch.ChannelType,
-				"channel_id":   ch.ID,
-				"chat_id":      msg.ChatID,
-				"user_id":      msg.UserID,
-				"username":     msg.Username,
-				"message_id":   msg.MessageID,
-				"history":      history,
-				"extra":        msg.Metadata,
+				"source":     ch.ChannelType,
+				"channel_id": ch.ID,
+				"chat_id":    msg.ChatID,
+				"user_id":    msg.UserID,
+				"username":   msg.Username,
+				"message_id": msg.MessageID,
+				"history":    history,
+				"extra":      msg.Metadata,
 			},
 		},
 	})
@@ -383,7 +384,7 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound

 	// Broadcast event
 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, "CHANNEL_MESSAGE", ch.WorkspaceID, map[string]interface{}{
+		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"username":     msg.Username,
@@ -427,7 +428,7 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
 	}

 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, "CHANNEL_MESSAGE", ch.WorkspaceID, map[string]interface{}{
+		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"direction":    "outbound",
@@ -14,10 +14,12 @@ import (
 	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
 	"github.com/gin-gonic/gin"
 )
+
 // proxyDispatchBuildError is a sentinel wrapper for failures inside
 // http.NewRequestWithContext. handleA2ADispatchError unwraps it to emit the
 // "failed to create proxy request" 500 instead of the standard 502/503 paths.
@@ -90,10 +92,10 @@ func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspace
 				Status:  http.StatusServiceUnavailable,
 				Headers: map[string]string{"Retry-After": strconv.Itoa(busyRetryAfterSeconds)},
 				Response: gin.H{
-					"error":           "workspace agent busy — adapter handles retry (native_session)",
-					"busy":            true,
-					"retry_after":     busyRetryAfterSeconds,
-					"native_session":  true,
+					"error":          "workspace agent busy — adapter handles retry (native_session)",
+					"busy":           true,
+					"retry_after":    busyRetryAfterSeconds,
+					"native_session": true,
 				},
 			}
 		}
@@ -149,7 +151,7 @@ func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspace
 // Provisioner selection (mutually exclusive in production):
 //   - h.provisioner != nil  → local Docker deployment; IsRunning does docker inspect.
 //   - h.cpProv != nil       → SaaS / EC2 deployment; IsRunning calls CP's
-//                              /cp/workspaces/:id/status to read the EC2 state.
+//     /cp/workspaces/:id/status to read the EC2 state.
 //
 // Pre-fix this function ONLY consulted h.provisioner — for SaaS tenants
 // (h.provisioner=nil, h.cpProv=set) it short-circuited to false on every
@@ -191,7 +193,7 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
 		log.Printf("ProxyA2A: failed to mark workspace %s offline: %v", workspaceID, err)
 	}
 	db.ClearWorkspaceKeys(ctx, workspaceID)
-	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_OFFLINE", workspaceID, map[string]interface{}{})
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
 	go h.RestartByID(workspaceID)
 	return true
 }
@@ -272,7 +274,7 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
 	}(ctx)

 	if callerID == "" && statusCode < 400 {
-		h.broadcaster.BroadcastOnly(workspaceID, "A2A_RESPONSE", map[string]interface{}{
+		h.broadcaster.BroadcastOnly(workspaceID, string(events.EventA2AResponse), map[string]interface{}{
 			"response_body": json.RawMessage(respBody),
 			"method":        a2aMethod,
 			"duration_ms":   durationMs,
@@ -21,6 +21,8 @@ import (
 	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )

 // extractIdempotencyKey pulls params.message.messageId out of an A2A JSON-RPC
@@ -419,7 +421,7 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
 		   AND method         = 'delegate_result'
 		   AND target_id      = $4
 		   AND response_body->>'delegation_id' = $5
-	`, "Delegation completed ("+truncate(responseText, 80)+")", string(respJSON),
+	`, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON),
 		sourceID, targetID, delegationID)
 	if err != nil {
 		log.Printf("A2AQueue drain stitch: update failed for delegation %s: %v", delegationID, err)
@@ -435,10 +437,10 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
 	// "⏸ queued" line to "✓ completed" in real time. Without this the
 	// transition only surfaces after the user reloads or polls activity.
 	if h.broadcaster != nil {
-		h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_COMPLETE", sourceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 			"delegation_id":    delegationID,
 			"target_id":        targetID,
-			"response_preview": truncate(responseText, 200),
+			"response_preview": textutil.TruncateBytes(responseText, 200),
 			"via":              "queue_drain",
 		})
 	}
@@ -55,7 +55,7 @@ func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
 func (h *ActivityHandler) List(c *gin.Context) {
 	workspaceID := c.Param("id")
 	activityType := c.Query("type")
-	source := c.Query("source") // "canvas" = source_id IS NULL, "agent" = source_id IS NOT NULL
+	source := c.Query("source")  // "canvas" = source_id IS NULL, "agent" = source_id IS NOT NULL
 	peerID := c.Query("peer_id") // optional UUID — restrict to rows where this peer is sender OR target
 	limitStr := c.DefaultQuery("limit", "100")
 	sinceSecsStr := c.Query("since_secs")
@@ -580,7 +580,45 @@ func (h *ActivityHandler) Report(c *gin.Context) {
 // LogActivity inserts an activity log and optionally broadcasts via WebSocket.
 // Takes events.EventEmitter (#1814) so callers passing a stub broadcaster
 // in tests no longer need to construct the full *events.Broadcaster.
+//
+// Errors are logged and swallowed — this is the fire-and-forget contract
+// most callers expect. For atomic-with-sibling-writes use LogActivityTx
+// and propagate the error.
 func LogActivity(ctx context.Context, broadcaster events.EventEmitter, params ActivityParams) {
+	hook, err := logActivityExec(ctx, db.DB, broadcaster, params)
+	if err != nil {
+		log.Printf("LogActivity insert error: %v", err)
+		return
+	}
+	hook()
+}
+
+// LogActivityTx inserts the activity row inside the caller-provided tx
+// and returns a commitHook that fires the post-commit ACTIVITY_LOGGED
+// broadcast. Caller MUST invoke commitHook AFTER tx.Commit() — firing
+// it before commit can leak a WebSocket event for a row that ends up
+// rolled back, which the canvas's optimistic UI then shows then loses.
+//
+// Returns an error if the INSERT fails — caller should Rollback. Caller
+// is also responsible for tx.BeginTx + tx.Commit/Rollback. Used by
+// chat_files uploadPollMode so PutBatchTx + N activity rows commit
+// atomically; if any activity row fails, the pending_uploads rows roll
+// back too and the client retries the entire multipart upload cleanly.
+func LogActivityTx(ctx context.Context, tx *sql.Tx, broadcaster events.EventEmitter, params ActivityParams) (commitHook func(), err error) {
+	if tx == nil {
+		return nil, errors.New("LogActivityTx: tx is nil")
+	}
+	return logActivityExec(ctx, tx, broadcaster, params)
+}
+
+// activityExecutor is the SQL surface LogActivity[Tx] needs. *sql.Tx
+// and *sql.DB both satisfy it, so the same insert path serves the
+// fire-and-forget caller (db.DB) and the Tx-aware caller (*sql.Tx).
+type activityExecutor interface {
+	ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)
+}
+
+func logActivityExec(ctx context.Context, exec activityExecutor, broadcaster events.EventEmitter, params ActivityParams) (commitHook func(), err error) {
 	reqJSON, reqErr := json.Marshal(params.RequestBody)
 	if reqErr != nil {
 		log.Printf("LogActivity: failed to marshal request_body for %s: %v", params.WorkspaceID, reqErr)
@@ -606,20 +644,21 @@ func LogActivity(ctx context.Context, broadcaster events.EventEmitter, params Ac
 		traceStr = &s
 	}

-	_, err := db.DB.ExecContext(ctx, `
+	if _, err := exec.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary, request_body, response_body, tool_trace, duration_ms, status, error_detail)
 		VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8::jsonb, $9::jsonb, $10, $11, $12)
 	`, params.WorkspaceID, params.ActivityType, params.SourceID, params.TargetID,
 		params.Method, params.Summary, reqStr, respStr, traceStr,
-		params.DurationMs, params.Status, params.ErrorDetail)
-	if err != nil {
-		log.Printf("LogActivity insert error: %v", err)
-		return
+		params.DurationMs, params.Status, params.ErrorDetail); err != nil {
+		return nil, err
 	}

-	// Broadcast ACTIVITY_LOGGED event
+	// Build the broadcast payload up-front so the post-commit hook is a
+	// pure in-memory call — no JSON marshaling between commit and emit
+	// where a panic would leak the row without an event.
+	var payload map[string]interface{}
 	if broadcaster != nil {
-		payload := map[string]interface{}{
+		payload = map[string]interface{}{
 			"activity_type": params.ActivityType,
 			"method":        params.Method,
 			"summary":       params.Summary,
@@ -650,8 +689,13 @@ func LogActivity(ctx context.Context, broadcaster events.EventEmitter, params Ac
 		if respStr != nil {
 			payload["response_body"] = json.RawMessage(respJSON)
 		}
-		broadcaster.BroadcastOnly(params.WorkspaceID, "ACTIVITY_LOGGED", payload)
 	}
+
+	return func() {
+		if broadcaster != nil {
+			broadcaster.BroadcastOnly(params.WorkspaceID, string(events.EventActivityLogged), payload)
+		}
+	}, nil
 }

 type ActivityParams struct {
@@ -5,6 +5,7 @@ import (
 	"context"
 	"database/sql/driver"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
 	"net/http/httptest"
@@ -909,6 +910,114 @@ func TestLogActivity_Broadcast_IncludesRequestAndResponseBodies(t *testing.T) {
 	}
 }

+// TestLogActivityTx_DefersBroadcastUntilCommitHook pins the #149
+// contract: LogActivityTx returns a commitHook that the caller MUST
+// invoke after tx.Commit(); the broadcast MUST NOT fire from inside
+// LogActivityTx itself. Firing inside would leak a websocket event
+// for a row that the caller may roll back, painting a ghost message
+// into the canvas's optimistic UI that disappears on the next refresh.
+func TestLogActivityTx_DefersBroadcastUntilCommitHook(t *testing.T) {
+	mock := setupTestDB(t)
+	defer mock.ExpectationsWereMet()
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO activity_logs").
+		WillReturnResult(sqlmock.NewResult(1, 1))
+	mock.ExpectCommit()
+
+	tx, err := db.DB.BeginTx(context.Background(), nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+
+	cb := &recordingBroadcaster{}
+	method := "chat_upload_receive"
+	hook, err := LogActivityTx(context.Background(), tx, cb, ActivityParams{
+		WorkspaceID:  "ws-123",
+		ActivityType: "a2a_receive",
+		Method:       &method,
+		Status:       "ok",
+	})
+	if err != nil {
+		t.Fatalf("LogActivityTx: %v", err)
+	}
+	if len(cb.calls) != 0 {
+		t.Errorf("broadcast leaked before commitHook: got %d calls", len(cb.calls))
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("Commit: %v", err)
+	}
+	hook()
+	if len(cb.calls) != 1 {
+		t.Fatalf("commitHook must broadcast exactly once, got %d", len(cb.calls))
+	}
+	if cb.calls[0].eventType != "ACTIVITY_LOGGED" {
+		t.Errorf("event type = %q, want ACTIVITY_LOGGED", cb.calls[0].eventType)
+	}
+}
+
+// TestLogActivityTx_InsertError_NoHook_NoBroadcast — when the INSERT
+// fails inside the Tx, LogActivityTx returns an error and a nil
+// commitHook. The caller is expected to Rollback; no broadcast can
+// possibly fire because the hook never exists.
+func TestLogActivityTx_InsertError_NoHook_NoBroadcast(t *testing.T) {
+	mock := setupTestDB(t)
+	defer mock.ExpectationsWereMet()
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO activity_logs").
+		WillReturnError(errors.New("constraint violation simulated"))
+	mock.ExpectRollback()
+
+	tx, err := db.DB.BeginTx(context.Background(), nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+
+	cb := &recordingBroadcaster{}
+	method := "chat_upload_receive"
+	hook, err := LogActivityTx(context.Background(), tx, cb, ActivityParams{
+		WorkspaceID:  "ws-123",
+		ActivityType: "a2a_receive",
+		Method:       &method,
+		Status:       "ok",
+	})
+	if err == nil {
+		t.Fatal("expected error on INSERT failure, got nil")
+	}
+	if hook != nil {
+		t.Errorf("commitHook must be nil on insert error, got non-nil hook")
+	}
+	if err := tx.Rollback(); err != nil {
+		t.Fatalf("Rollback: %v", err)
+	}
+	if len(cb.calls) != 0 {
+		t.Errorf("broadcast must NOT fire on insert error, got %d calls", len(cb.calls))
+	}
+}
+
+// TestLogActivityTx_NilTx_Errors — passing a nil tx is caller misuse.
+// Return an error rather than panicking on the nil receiver inside
+// ExecContext (which would crash the request goroutine and surface as
+// a 500 with no log line tying it to the bad call site).
+func TestLogActivityTx_NilTx_Errors(t *testing.T) {
+	cb := &recordingBroadcaster{}
+	hook, err := LogActivityTx(context.Background(), nil, cb, ActivityParams{
+		WorkspaceID:  "ws-123",
+		ActivityType: "a2a_receive",
+		Status:       "ok",
+	})
+	if err == nil {
+		t.Fatal("nil tx must error, got nil")
+	}
+	if hook != nil {
+		t.Errorf("commitHook must be nil when tx is nil, got non-nil hook")
+	}
+	if len(cb.calls) != 0 {
+		t.Errorf("broadcast must NOT fire on nil-tx error, got %d", len(cb.calls))
+	}
+}
+
 func TestLogActivity_Broadcast_IncludesResponseBody(t *testing.T) {
 	mock := setupTestDB(t)
 	defer mock.ExpectationsWereMet()
@@ -56,10 +56,17 @@ type RefreshResult struct {
 	Recreated []string `json:"recreated"`
 }

-// TemplateImageRef returns the canonical GHCR ref for a runtime's template
-// image. Single source of truth shared with imagewatch.
+// TemplateImageRef returns the canonical image ref for a runtime's template,
+// using the configured registry (provisioner.RegistryPrefix()) and the
+// moving `:latest` tag. Single source of truth shared with imagewatch.
+//
+// Defaults to ghcr.io/molecule-ai/workspace-template-<runtime>:latest
+// (upstream OSS). When MOLECULE_IMAGE_REGISTRY is set in the environment
+// (typically the AWS ECR mirror in production), this returns the prefixed
+// equivalent so admin operations and image-watch checks hit the same
+// registry the provisioner pulls from.
 func TemplateImageRef(runtime string) string {
-	return fmt.Sprintf("ghcr.io/molecule-ai/workspace-template-%s:latest", runtime)
+	return fmt.Sprintf("%s/workspace-template-%s:latest", provisioner.RegistryPrefix(), runtime)
 }

 // ghcrAuthHeader returns the base64-encoded JSON auth payload Docker's
@@ -69,7 +69,7 @@ func (h *AgentHandler) Assign(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, "AGENT_ASSIGNED", workspaceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventAgentAssigned), workspaceID, map[string]interface{}{
 		"agent_id": agentID,
 		"model":    body.Model,
 	})
@@ -118,7 +118,7 @@ func (h *AgentHandler) Replace(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, "AGENT_REPLACED", workspaceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventAgentReplaced), workspaceID, map[string]interface{}{
 		"agent_id":  agentID,
 		"model":     body.Model,
 		"old_model": oldModel,
@@ -148,7 +148,7 @@ func (h *AgentHandler) Remove(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, "AGENT_REMOVED", workspaceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventAgentRemoved), workspaceID, map[string]interface{}{
 		"agent_id": agentID,
 		"model":    model,
 	})
@@ -215,21 +215,21 @@ func (h *AgentHandler) Move(c *gin.Context) {
 	}

 	// Broadcast on both workspaces
-	h.broadcaster.RecordAndBroadcast(ctx, "AGENT_MOVED", sourceID, map[string]interface{}{
-		"agent_id":             agentID,
-		"model":                model,
-		"target_workspace_id":  body.TargetWorkspaceID,
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventAgentMoved), sourceID, map[string]interface{}{
+		"agent_id":            agentID,
+		"model":               model,
+		"target_workspace_id": body.TargetWorkspaceID,
 	})
-	h.broadcaster.RecordAndBroadcast(ctx, "AGENT_MOVED", body.TargetWorkspaceID, map[string]interface{}{
-		"agent_id":             agentID,
-		"model":                model,
-		"source_workspace_id":  sourceID,
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventAgentMoved), body.TargetWorkspaceID, map[string]interface{}{
+		"agent_id":            agentID,
+		"model":               model,
+		"source_workspace_id": sourceID,
 	})

 	c.JSON(http.StatusOK, gin.H{
-		"agent_id":            agentID,
-		"model":               model,
-		"from_workspace":      sourceID,
-		"to_workspace":        body.TargetWorkspaceID,
+		"agent_id":       agentID,
+		"model":          model,
+		"from_workspace": sourceID,
+		"to_workspace":   body.TargetWorkspaceID,
 	})
 }
@@ -42,9 +42,9 @@ import (
 	"errors"
 	"fmt"
 	"log"
-	"unicode/utf8"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )

 // ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the
@@ -54,36 +54,6 @@ import (
 // timeout) surface as wrapped errors and should be treated as 503.
 var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found")

-// truncatePreviewRunes returns at most maxRunes runes of s, plus an ellipsis
-// when truncated. Operates on the rune (codepoint) boundary instead of
-// byte indices — the previous byte-slice version produced invalid UTF-8
-// when maxRunes landed mid-codepoint (CJK, emoji, accented characters
-// in agent-authored chat messages), and Postgres JSONB rejects invalid
-// UTF-8, dropping the activity_log INSERT silently. The persistence
-// failure log fires but the message vanishes from chat history — the
-// exact regression class the SSOT consolidation was built to prevent.
-//
-// maxRunes is in runes, not bytes — `truncatePreviewRunes("你好", 1)` returns
-// `"你…"`, not `"\xe4…"`. Set the cap on a UI-friendly basis (visible
-// character count, not stored byte count); 80 runes covers the
-// activity_logs.summary column comfortably.
-func truncatePreviewRunes(s string, maxRunes int) string {
-	if utf8.RuneCountInString(s) <= maxRunes {
-		return s
-	}
-	// Walk runes until we've consumed maxRunes; cut at that byte index.
-	count := 0
-	cut := len(s)
-	for i := range s {
-		if count == maxRunes {
-			cut = i
-			break
-		}
-		count++
-	}
-	return s[:cut] + "…"
-}
-
 // AgentMessageAttachment is one file attached to an agent → user
 // message. Identical to handlers.NotifyAttachment in field set; kept
 // distinct so the writer's API doesn't import a handler type with HTTP
@@ -186,7 +156,7 @@ func (w *AgentMessageWriter) Send(
 		respPayload["parts"] = fileParts
 	}
 	respJSON, _ := json.Marshal(respPayload)
-	preview := truncatePreviewRunes(message, 80)
+	preview := textutil.TruncateRunes(message, 80)
 	if _, err := w.db.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
 		VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
@@ -331,45 +331,11 @@ func TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped(t *testing.T) {
 	}
 }

-// TestTruncatePreviewRunes_RuneBoundary pins the multi-byte-safe
-// truncation. The previous byte-slice version produced invalid UTF-8
-// when the cut landed mid-codepoint (CJK, emoji, accented), and
-// Postgres JSONB rejects invalid UTF-8 — INSERT fails, log.Printf
-// fires, message vanishes from chat history. Per memory
-// feedback_assert_exact_not_substring.md, pin the boundary cases
-// directly.
-func TestTruncatePreviewRunes_RuneBoundary(t *testing.T) {
-	cases := []struct {
-		name     string
-		in       string
-		max      int
-		want     string
-	}{
-		{"under-max ASCII", "hi", 80, "hi"},
-		{"under-max CJK", "你好", 80, "你好"},
-		{"exactly-at-max", "abcde", 5, "abcde"},
-		{"truncate ASCII", "abcdefghij", 5, "abcde…"},
-		{"truncate CJK at rune boundary", "你好世界你好世界", 4, "你好世界…"},
-		{"truncate emoji at rune boundary", "😀😀😀😀😀😀", 3, "😀😀😀…"},
-		// The pre-fix bug shape: byte-slice on non-ASCII would have
-		// mangled the codepoint here. With rune-boundary truncation
-		// the result is well-formed UTF-8.
-		{"non-zero with emoji prefix", "🚀abcdefghijk", 5, "🚀abcd…"},
-	}
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			got := truncatePreviewRunes(c.in, c.max)
-			if got != c.want {
-				t.Errorf("truncatePreviewRunes(%q, %d) = %q, want %q", c.in, c.max, got, c.want)
-			}
-			// Always-valid UTF-8 invariant. A byte-slice truncation
-			// could leave partial codepoints; this version must not.
-			if !utf8.ValidString(got) {
-				t.Errorf("truncatePreviewRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.max, got)
-			}
-		})
-	}
-}
+// Helper-level truncate tests now live in
+// internal/textutil/truncate_test.go (TestTruncateRunes). The
+// integration-level coverage that exercises the agent_message_writer
+// path with non-ASCII content is TestAgentMessageWriter_Send_NonASCIIMessagePersists
+// below.

 // TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end
 // path for non-ASCII messages — the original reno-stars regression
@@ -51,7 +51,7 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, "APPROVAL_REQUESTED", workspaceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"action":      body.Action,
 		"reason":      body.Reason,
@@ -62,7 +62,7 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 	var parentID *string
 	db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID)
 	if parentID != nil {
-		h.broadcaster.RecordAndBroadcast(ctx, "APPROVAL_ESCALATED", *parentID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
 			"approval_id":       approvalID,
 			"from_workspace_id": workspaceID,
 			"action":            body.Action,
@@ -656,8 +656,28 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
 		})
 	}

-	// Phase 2: atomic batch insert. On failure no rows commit.
-	fileIDs, err := h.pendingUploads.PutBatch(ctx, wsUUID, items)
+	// Phase 2+3: PutBatch + N activity-row inserts run in ONE Tx so
+	// either every pending_uploads row + every activity_logs row commits,
+	// or none do. Per-file pre-validation already happened above so the
+	// only failure modes inside the Tx are DB-side; either way Rollback
+	// leaves the table state unchanged and the client retries the whole
+	// multipart upload cleanly. Broadcasts are deferred until after
+	// Commit — emitting an ACTIVITY_LOGGED event for a row that ends up
+	// rolled back would leak a ghost message into the canvas's
+	// optimistic UI.
+	tx, err := db.DB.BeginTx(ctx, nil)
+	if err != nil {
+		log.Printf("chat_files uploadPollMode: begin tx for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
+		return
+	}
+	// Defer-rollback is safe even after a successful Commit — the second
+	// Rollback is a no-op (database/sql tracks tx state).
+	defer func() {
+		_ = tx.Rollback()
+	}()
+
+	fileIDs, err := h.pendingUploads.PutBatchTx(ctx, tx, wsUUID, items)
 	if err != nil {
 		if errors.Is(err, pendinguploads.ErrTooLarge) {
 			// Belt + suspenders: pre-validation above already caught
@@ -669,28 +689,20 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
 			})
 			return
 		}
-		log.Printf("chat_files uploadPollMode: storage.PutBatch failed for %s: %v",
+		log.Printf("chat_files uploadPollMode: storage.PutBatchTx failed for %s: %v",
 			workspaceID, err)
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
 		return
 	}

-	// Phase 3: write per-file activity rows and build the response. Activity
-	// rows are written individually (not part of the same Tx as PutBatch)
-	// because LogActivity is shared across many handlers and threading the
-	// Tx through would be a bigger refactor. The trade-off: if an activity
-	// write fails after the PutBatch commits, the pending_uploads rows
-	// orphan until the 24h TTL — significantly better than the previous
-	// "every multi-file upload could orphan" behavior, and the workspace's
-	// fetcher handles soft-404 cleanly when activity rows reference a row
-	// the platform later expired.
 	out := make([]uploadedFile, 0, len(prepReady))
+	broadcasts := make([]func(), 0, len(prepReady))
 	for i, p := range prepReady {
 		fileID := fileIDs[i]
 		uri := fmt.Sprintf("platform-pending:%s/%s", workspaceID, fileID)
 		summary := "chat_upload_receive: " + p.Sanitized
 		method := "chat_upload_receive"
-		LogActivity(ctx, h.broadcaster, ActivityParams{
+		hook, err := LogActivityTx(ctx, tx, h.broadcaster, ActivityParams{
 			WorkspaceID:  workspaceID,
 			ActivityType: "a2a_receive",
 			TargetID:     &workspaceID,
@@ -705,10 +717,13 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
 			},
 			Status: "ok",
 		})
-
-		log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)",
-			workspaceID, p.Sanitized, fileID, len(p.Content), p.Mimetype)
-
+		if err != nil {
+			log.Printf("chat_files uploadPollMode: activity insert failed for %s/%s: %v",
+				workspaceID, p.Sanitized, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "could not log upload activity"})
+			return
+		}
+		broadcasts = append(broadcasts, hook)
 		out = append(out, uploadedFile{
 			URI:      uri,
 			Name:     p.Sanitized,
@@ -717,6 +732,24 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
 		})
 	}

+	if err := tx.Commit(); err != nil {
+		log.Printf("chat_files uploadPollMode: commit failed for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
+		return
+	}
+
+	// Post-commit: fire deferred broadcasts and emit the staged log
+	// lines now that the rows are durable. Broadcasts are pure in-memory
+	// (no I/O); panicking here would NOT leak a row but would leak a
+	// log line, so the order doesn't matter for correctness.
+	for _, b := range broadcasts {
+		b()
+	}
+	for i, p := range prepReady {
+		log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)",
+			workspaceID, p.Sanitized, fileIDs[i], len(p.Content), p.Mimetype)
+	}
+
 	c.JSON(http.StatusOK, gin.H{"files": out})
 }

@@ -107,6 +107,16 @@ func (s *inMemStorage) PutBatch(_ context.Context, ws uuid.UUID, items []pending
 	return ids, nil
 }

+// PutBatchTx mirrors PutBatch for the Tx-aware caller path. The tx
+// argument is not consulted — production atomicity (PutBatch INSERTs +
+// activity_logs INSERTs in the same Tx) is verified by the dedicated
+// integration test against real Postgres. This in-mem fake records the
+// puts immediately; tests that exercise the rollback path use
+// putErr/sqlmock to simulate the failure.
+func (s *inMemStorage) PutBatchTx(ctx context.Context, _ *sql.Tx, ws uuid.UUID, items []pendinguploads.PutItem) ([]uuid.UUID, error) {
+	return s.PutBatch(ctx, ws, items)
+}
+
 func (s *inMemStorage) Get(context.Context, uuid.UUID) (pendinguploads.Record, error) {
 	return pendinguploads.Record{}, pendinguploads.ErrNotFound
 }
@@ -138,11 +148,37 @@ func expectPollDeliveryModeMissing(mock sqlmock.Sqlmock, workspaceID string) {

 // expectActivityInsert stubs the LogActivity INSERT so the poll branch's
 // per-file activity row write doesn't fail the sqlmock expectations.
+// In the post-#149 path this INSERT runs inside the BeginTx that wraps
+// PutBatchTx + N activity rows — pair it with expectUploadPollTxBegin
+// + expectUploadPollTxCommit (or Rollback) when the test exercises
+// uploadPollMode.
 func expectActivityInsert(mock sqlmock.Sqlmock) {
 	mock.ExpectExec(`INSERT INTO activity_logs`).
 		WillReturnResult(sqlmock.NewResult(1, 1))
 }

+// expectUploadPollTxBegin marks the start of the BeginTx that
+// uploadPollMode opens around PutBatchTx + per-file LogActivityTx.
+// inMemStorage doesn't drive sqlmock for the pending_uploads INSERTs
+// (it's a process-local fake), so the only Tx-scoped DB calls
+// sqlmock sees are the activity_logs INSERTs.
+func expectUploadPollTxBegin(mock sqlmock.Sqlmock) {
+	mock.ExpectBegin()
+}
+
+// expectUploadPollTxCommit pairs with expectUploadPollTxBegin on the
+// happy path — every activity row inserted, Tx committed.
+func expectUploadPollTxCommit(mock sqlmock.Sqlmock) {
+	mock.ExpectCommit()
+}
+
+// expectUploadPollTxRollback pairs with expectUploadPollTxBegin on a
+// failure path — PutBatchTx error, activity insert error, or any other
+// abort that triggers the deferred tx.Rollback() in uploadPollMode.
+func expectUploadPollTxRollback(mock sqlmock.Sqlmock) {
+	mock.ExpectRollback()
+}
+
 // expectActivityInsertWithTypeAndMethod is a strict variant that pins
 // the activity_type and method positional args. Used in the discriminator
 // regression test below — the workspace inbox poller filters
@@ -198,7 +234,9 @@ func TestPollUpload_HappyPath_OneFile_StagesAndLogs(t *testing.T) {

 	wsID := "11111111-2222-3333-4444-555555555555"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
 	expectActivityInsert(mock)
+	expectUploadPollTxCommit(mock)

 	store := newInMemStorage()
 	h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
@@ -254,9 +292,11 @@ func TestPollUpload_MultipleFiles_AllStagedAndLogged(t *testing.T) {

 	wsID := "11111111-aaaa-bbbb-cccc-555555555555"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
 	expectActivityInsert(mock)
 	expectActivityInsert(mock)
 	expectActivityInsert(mock)
+	expectUploadPollTxCommit(mock)

 	store := newInMemStorage()
 	h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
@@ -425,6 +465,8 @@ func TestPollUpload_StorageError_500(t *testing.T) {

 	wsID := "88888888-2222-3333-4444-555555555555"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
+	expectUploadPollTxRollback(mock)

 	store := newInMemStorage()
 	store.putErr = errors.New("disk full")
@@ -446,6 +488,8 @@ func TestPollUpload_StorageTooLarge_413(t *testing.T) {

 	wsID := "99999999-2222-3333-4444-555555555555"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
+	expectUploadPollTxRollback(mock)

 	store := newInMemStorage()
 	store.putErr = pendinguploads.ErrTooLarge
@@ -569,7 +613,9 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {

 	wsID := "bbbbbbbb-2222-3333-4444-555555555555"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
 	expectActivityInsert(mock)
+	expectUploadPollTxCommit(mock)

 	store := newInMemStorage()
 	h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
@@ -650,6 +696,8 @@ func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) {

 	wsID := "bbbbbbbb-3333-3333-4444-555555555555"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
+	expectUploadPollTxRollback(mock)

 	store := newInMemStorage()
 	store.putErr = errors.New("db down mid-batch")
@@ -672,6 +720,58 @@ func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) {
 	}
 }

+// TestPollUpload_AtomicRollbackOnActivityInsertFailure pins the #149
+// guarantee: if an activity_logs INSERT fails mid-loop (after some
+// rows have already been INSERTed in the same Tx), uploadPollMode
+// MUST Rollback so neither the pending_uploads nor the activity rows
+// commit. Pre-#149 the activity rows were written one-by-one outside
+// any Tx; a mid-loop failure left orphan pending_uploads rows the
+// 24h TTL would later sweep, but the user never saw the file in the
+// canvas. Post-#149 the contract is all-or-nothing.
+//
+// What this pins: the second activity insert errors → Tx rolls back
+// → response is 500 → no Commit. Pin via the sqlmock rollback
+// expectation; the inMemStorage will report puts=N (it doesn't model
+// Tx state), but at the SQL layer no rows committed.
+func TestPollUpload_AtomicRollbackOnActivityInsertFailure(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	wsID := "cccccccc-3333-3333-4444-555555555555"
+	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
+	// File 1 inserts cleanly. File 2's INSERT fails. uploadPollMode
+	// must NOT call Commit and the deferred tx.Rollback() runs.
+	mock.ExpectExec(`INSERT INTO activity_logs`).
+		WillReturnResult(sqlmock.NewResult(1, 1))
+	mock.ExpectExec(`INSERT INTO activity_logs`).
+		WillReturnError(errors.New("constraint violation simulated"))
+	expectUploadPollTxRollback(mock)
+
+	store := newInMemStorage()
+	h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
+		WithPendingUploads(store, nil)
+
+	body, ct := pollUploadFixture(t, map[string][]byte{
+		"a.txt": []byte("aaa"),
+		"b.txt": []byte("bbb"),
+		"c.txt": []byte("ccc"),
+	})
+	c, w := makeUploadRequest(t, wsID, body, ct)
+	h.Upload(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("status=%d body=%s, want 500 on activity-insert mid-loop failure",
+			w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		// This is the load-bearing assertion: ExpectationsWereMet only
+		// passes if Rollback was called and Commit was NOT — the SQL-
+		// level proof of the all-or-nothing contract.
+		t.Errorf("Tx must rollback (and NOT commit) on activity-insert failure: %v", err)
+	}
+}
+
 // TestPollUpload_MimetypeWithCRLFInjectionStripped pins the safeMimetype
 // hardening: a multipart-supplied Content-Type header with CR/LF is
 // rewritten to application/octet-stream so the eventual /content
@@ -731,7 +831,9 @@ func TestPollUpload_ActivityRowDiscriminator(t *testing.T) {

 	wsID := "abc12345-6789-4abc-8def-000000000999"
 	expectPollDeliveryMode(mock, wsID, "poll")
+	expectUploadPollTxBegin(mock)
 	expectActivityInsertWithTypeAndMethod(mock, wsID, "a2a_receive", "chat_upload_receive")
+	expectUploadPollTxCommit(mock)

 	store := newInMemStorage()
 	h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
@@ -0,0 +1,113 @@
+package handlers
+
+// chat_history.go — HTTP-shape adapter over messagestore.MessageStore
+// (RFC #2945 PR-D).
+//
+// Pre-PR-D, this file owned the activity_logs query AND the parser
+// AND the HTTP plumbing. PR-D extracts the storage + parser into
+// internal/messagestore/ so OSS operators can plug in alternative
+// backends (S3-tiered, vector store, in-memory). The handler is now
+// a thin adapter: parse query params → call store → emit JSON.
+//
+// Endpoint: GET /workspaces/:id/chat-history?limit=N&before_ts=T
+// Auth: same wsAuth chain as /workspaces/:id/activity (tenant
+// ADMIN_TOKEN + X-Molecule-Org-Id header). No new trust boundary.
+//
+// Behavioral parity with canvas TS is enforced at the messagestore
+// layer (internal/messagestore/postgres_store_test.go); this file's
+// tests cover the HTTP-shape concerns only.
+
+import (
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/messagestore"
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+)
+
+// ChatHistoryResponse is the wire shape for GET /chat-history.
+type ChatHistoryResponse struct {
+	Messages   []messagestore.ChatMessage `json:"messages"`
+	ReachedEnd bool                       `json:"reached_end"`
+}
+
+// ChatHistoryHandler exposes the typed chat-history endpoint over a
+// MessageStore. The store is injected so OSS operators can swap the
+// backend without forking the handler.
+type ChatHistoryHandler struct {
+	store messagestore.MessageStore
+}
+
+// NewChatHistoryHandler wires a MessageStore (typically
+// messagestore.NewPostgresMessageStore at production startup).
+//
+// Tests inject fakes (see internal/handlers/chat_history_test.go).
+// Constructor takes the interface, not a concrete type, so the
+// platform-default vs OSS-alternative decision happens at wiring
+// time in router.go.
+func NewChatHistoryHandler(store messagestore.MessageStore) *ChatHistoryHandler {
+	return &ChatHistoryHandler{store: store}
+}
+
+// List handles GET /workspaces/:id/chat-history?limit=N&before_ts=T.
+//
+// Query parameters mirror /activity for caller convenience:
+//
+//   - limit (default 100, max 1000) — page size
+//   - before_ts (RFC3339, optional) — cursor for paginating backward
+//
+// Validates inputs at the trust boundary; the store sees only
+// well-formed ListOptions.
+func (h *ChatHistoryHandler) List(c *gin.Context) {
+	workspaceID := c.Param("id")
+	if _, err := uuid.Parse(workspaceID); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "workspace id must be a UUID"})
+		return
+	}
+
+	limit := 100
+	if v := c.Query("limit"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n > 0 {
+			limit = n
+		}
+	}
+	if limit > 1000 {
+		limit = 1000
+	}
+
+	opts := messagestore.ListOptions{Limit: limit}
+	if v := c.Query("before_ts"); v != "" {
+		t, err := time.Parse(time.RFC3339, v)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{
+				"error": "before_ts must be an RFC3339 timestamp (e.g. 2026-05-01T00:00:00Z)",
+			})
+			return
+		}
+		opts.BeforeTS = t
+		opts.HasBefore = true
+	}
+
+	messages, reachedEnd, err := h.store.List(c.Request.Context(), workspaceID, opts)
+	if err != nil {
+		// Errors here are infra (DB unreachable, store impl failure).
+		// Surface as 502 so the canvas can retry vs. treating as
+		// "no rows."
+		c.JSON(http.StatusBadGateway, gin.H{"error": "chat history unavailable"})
+		return
+	}
+
+	// Defensive: if the store returns nil messages slice (any impl
+	// might), emit empty array rather than `null` so canvas's JSON
+	// parser doesn't have to handle two empty representations.
+	if messages == nil {
+		messages = []messagestore.ChatMessage{}
+	}
+
+	c.JSON(http.StatusOK, ChatHistoryResponse{
+		Messages:   messages,
+		ReachedEnd: reachedEnd,
+	})
+}
@@ -0,0 +1,276 @@
+package handlers
+
+// chat_history_test.go — handler-level tests against a fake
+// MessageStore. The parser-level parity tests against the canvas TS
+// fixtures live in internal/messagestore/postgres_store_test.go;
+// this file covers the HTTP-shape concerns (param validation,
+// pagination passthrough, error mapping) without touching a DB.
+//
+// Why the split: PR-D extracted storage to messagestore.MessageStore.
+// The handler is now a thin adapter — its tests should exercise the
+// adapter (ParseQuery → store.List → emitJSON), not the parser. A
+// future MessageStore impl (S3, vector store) shares the same
+// handler; testing the handler against the interface keeps the
+// adapter test independent of any specific impl.
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/messagestore"
+	"github.com/gin-gonic/gin"
+)
+
+const testWorkspaceID = "550e8400-e29b-41d4-a716-446655440000"
+
+func init() {
+	gin.SetMode(gin.TestMode)
+}
+
+// fakeStore is a stub MessageStore for handler-level tests. Every
+// real store impl (Postgres, S3, vector) shares the handler — so a
+// fake that records inputs + returns scripted outputs is the right
+// granularity for HTTP-shape coverage.
+type fakeStore struct {
+	// LastWorkspaceID + LastOpts capture the call shape so the test
+	// can assert the handler passed the right args to the store.
+	LastWorkspaceID string
+	LastOpts        messagestore.ListOptions
+
+	// Returns — set per test.
+	ReturnMessages   []messagestore.ChatMessage
+	ReturnReachedEnd bool
+	ReturnErr        error
+
+	// Panic — if non-empty, List panics with this string. Used by
+	// the resilience test to confirm the handler returns 502 on
+	// store-impl failures rather than crashing the goroutine.
+	PanicWith string
+}
+
+func (s *fakeStore) List(ctx context.Context, workspaceID string, opts messagestore.ListOptions) ([]messagestore.ChatMessage, bool, error) {
+	if s.PanicWith != "" {
+		panic(s.PanicWith)
+	}
+	s.LastWorkspaceID = workspaceID
+	s.LastOpts = opts
+	return s.ReturnMessages, s.ReturnReachedEnd, s.ReturnErr
+}
+
+// Compile-time assertion that fakeStore satisfies the interface.
+// Catches drift if the interface changes and the fake stops being a
+// drop-in for tests.
+var _ messagestore.MessageStore = (*fakeStore)(nil)
+
+func newRouter(store messagestore.MessageStore) *gin.Engine {
+	r := gin.New()
+	h := NewChatHistoryHandler(store)
+	r.GET("/workspaces/:id/chat-history", h.List)
+	return r
+}
+
+func doChatHistoryRequest(t *testing.T, r *gin.Engine, path string) *httptest.ResponseRecorder {
+	t.Helper()
+	req := httptest.NewRequest(http.MethodGet, path, nil)
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+	return w
+}
+
+// =====================================================================
+// Param validation
+// =====================================================================
+
+func TestChatHistoryHandler_RejectsNonUUIDWorkspaceID(t *testing.T) {
+	store := &fakeStore{}
+	r := newRouter(store)
+
+	w := doChatHistoryRequest(t, r, "/workspaces/not-a-uuid/chat-history")
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400 for non-UUID, got %d", w.Code)
+	}
+	if store.LastWorkspaceID != "" {
+		t.Errorf("non-UUID reached the store: %q", store.LastWorkspaceID)
+	}
+}
+
+func TestChatHistoryHandler_RejectsMalformedBeforeTS(t *testing.T) {
+	store := &fakeStore{}
+	r := newRouter(store)
+
+	w := doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history?before_ts=not-a-timestamp")
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400 for malformed before_ts, got %d", w.Code)
+	}
+	if !strings.Contains(w.Body.String(), "RFC3339") {
+		t.Errorf("error message should mention RFC3339; got %q", w.Body.String())
+	}
+}
+
+func TestChatHistoryHandler_DefaultsLimitTo100(t *testing.T) {
+	store := &fakeStore{}
+	r := newRouter(store)
+
+	doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history")
+	if store.LastOpts.Limit != 100 {
+		t.Errorf("default limit=%d want 100", store.LastOpts.Limit)
+	}
+	if store.LastOpts.HasBefore {
+		t.Errorf("HasBefore should be false when no cursor passed")
+	}
+}
+
+func TestChatHistoryHandler_ClampsLimitToMax1000(t *testing.T) {
+	store := &fakeStore{}
+	r := newRouter(store)
+
+	doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history?limit=99999")
+	if store.LastOpts.Limit != 1000 {
+		t.Errorf("limit not clamped: got %d, want 1000", store.LastOpts.Limit)
+	}
+}
+
+func TestChatHistoryHandler_IgnoresInvalidLimit(t *testing.T) {
+	// Negative or zero limits should fall back to default rather
+	// than reach the store (which rejects them as a programming bug).
+	store := &fakeStore{}
+	r := newRouter(store)
+
+	for _, bad := range []string{"-1", "0", "abc"} {
+		store.LastOpts = messagestore.ListOptions{}
+		doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history?limit="+bad)
+		if store.LastOpts.Limit != 100 {
+			t.Errorf("limit=%q yielded %d, want default 100", bad, store.LastOpts.Limit)
+		}
+	}
+}
+
+// =====================================================================
+// Pagination passthrough
+// =====================================================================
+
+func TestChatHistoryHandler_BeforeTSPassedToStore(t *testing.T) {
+	store := &fakeStore{}
+	r := newRouter(store)
+
+	doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history?before_ts=2026-04-25T18:00:00Z&limit=25")
+
+	if !store.LastOpts.HasBefore {
+		t.Errorf("HasBefore=false but query passed before_ts")
+	}
+	got := store.LastOpts.BeforeTS.UTC().Format("2006-01-02T15:04:05Z")
+	if got != "2026-04-25T18:00:00Z" {
+		t.Errorf("BeforeTS=%q want 2026-04-25T18:00:00Z", got)
+	}
+	if store.LastOpts.Limit != 25 {
+		t.Errorf("limit=%d want 25", store.LastOpts.Limit)
+	}
+}
+
+// =====================================================================
+// Response shape
+// =====================================================================
+
+func TestChatHistoryHandler_EmptyResultIsArrayNotNull(t *testing.T) {
+	// nil messages slice from the store must serialize as `[]`,
+	// not `null` — canvas's JSON parser has one path.
+	store := &fakeStore{ReturnMessages: nil, ReturnReachedEnd: true}
+	r := newRouter(store)
+	w := doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history")
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d", w.Code)
+	}
+	var resp ChatHistoryResponse
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("body not JSON: %v", err)
+	}
+	// json.Unmarshal of `null` into a []slice yields a nil — assert
+	// the JSON literally contains "[]" so a future change that
+	// forgets the nil-coercion would fail loudly.
+	if !strings.Contains(w.Body.String(), `"messages":[]`) {
+		t.Errorf("body should contain `\"messages\":[]`; got %s", w.Body.String())
+	}
+	if !resp.ReachedEnd {
+		t.Errorf("reached_end not propagated")
+	}
+}
+
+func TestChatHistoryHandler_NonEmptyResponsePreservesShape(t *testing.T) {
+	size := int64(4096)
+	store := &fakeStore{
+		ReturnMessages: []messagestore.ChatMessage{
+			{
+				ID:        "msg-1",
+				Role:      "user",
+				Content:   "hi",
+				Timestamp: "2026-04-25T18:00:00Z",
+			},
+			{
+				ID:      "msg-2",
+				Role:    "agent",
+				Content: "hello back",
+				Attachments: []messagestore.ChatAttachment{
+					{Name: "img.png", URI: "workspace:/img.png", MimeType: "image/png", Size: &size},
+				},
+				Timestamp: "2026-04-25T18:00:01Z",
+			},
+		},
+		ReturnReachedEnd: false,
+	}
+	r := newRouter(store)
+	w := doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history")
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	var resp ChatHistoryResponse
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("body not JSON: %v", err)
+	}
+	if len(resp.Messages) != 2 {
+		t.Fatalf("messages=%d want 2", len(resp.Messages))
+	}
+	if resp.Messages[1].Attachments[0].Size == nil || *resp.Messages[1].Attachments[0].Size != 4096 {
+		t.Errorf("size pointer flattened in JSON round-trip")
+	}
+}
+
+// =====================================================================
+// Error mapping — store errors become 502, not 500/panic
+// =====================================================================
+
+func TestChatHistoryHandler_StoreErrorReturns502(t *testing.T) {
+	store := &fakeStore{ReturnErr: errors.New("simulated DB unreachable")}
+	r := newRouter(store)
+	w := doChatHistoryRequest(t, r, "/workspaces/"+testWorkspaceID+"/chat-history")
+
+	if w.Code != http.StatusBadGateway {
+		t.Errorf("expected 502 on store error, got %d", w.Code)
+	}
+	if !strings.Contains(w.Body.String(), "unavailable") {
+		t.Errorf("response body should communicate unavailability; got %q", w.Body.String())
+	}
+}
+
+// =====================================================================
+// Interface conformance — the platform-default Postgres impl is the
+// only impl in tree today, but the assertion catches future drift if
+// the interface evolves and the impl falls behind.
+// =====================================================================
+
+func TestMessageStoreInterface_PostgresImplSatisfies(t *testing.T) {
+	// Compile-time assertion lives in messagestore/postgres_store.go
+	// (`var _ MessageStore = (*PostgresMessageStore)(nil)`). This
+	// runtime test exists only to keep the conformance visible in
+	// the handler test file — a reader of chat_history_test.go
+	// shouldn't have to traverse to the messagestore package to see
+	// what the handler is paired with.
+	var s messagestore.MessageStore = messagestore.NewPostgresMessageStore(nil)
+	_ = s
+}
@@ -10,6 +10,7 @@ import (

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 )
@@ -164,10 +165,10 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 	go h.executeDelegation(sourceID, body.TargetID, delegationID, a2aBody)

 	// Broadcast event so canvas shows delegation in real-time
-	h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_SENT", sourceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
 		"delegation_id": delegationID,
 		"target_id":     body.TargetID,
-		"task_preview":  truncate(body.Task, 100),
+		"task_preview":  textutil.TruncateBytes(body.Task, 100),
 	})

 	resp := gin.H{
@@ -317,7 +318,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s

 	// Update status: pending → dispatched
 	h.updateDelegationStatus(sourceID, delegationID, "dispatched", "")
-	h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_STATUS", sourceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationStatus), sourceID, map[string]interface{}{
 		"delegation_id": delegationID, "target_id": targetID, "status": "dispatched",
 	})

@@ -352,7 +353,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
 			log.Printf("Delegation %s: failed to insert error log: %v", delegationID, err)
 		}

-		h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_FAILED", sourceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationFailed), sourceID, map[string]interface{}{
 			"delegation_id": delegationID, "target_id": targetID, "error": proxyErr.Error(),
 		})
 		// RFC #2829 PR-2 result-push (see UpdateStatus for rationale).
@@ -388,7 +389,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
 		`, sourceID, sourceID, targetID, "Delegation queued — target at capacity", string(queuedJSON)); err != nil {
 			log.Printf("Delegation %s: failed to insert queued log: %v", delegationID, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_STATUS", sourceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationStatus), sourceID, map[string]interface{}{
 			"delegation_id": delegationID, "target_id": targetID, "status": "queued",
 		})
 		return
@@ -407,7 +408,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
 	if _, err := db.DB.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
 		VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'completed')
-	`, sourceID, sourceID, targetID, "Delegation completed ("+truncate(responseText, 80)+")", string(respJSON)); err != nil {
+	`, sourceID, sourceID, targetID, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON)); err != nil {
 		log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err)
 	}

@@ -420,10 +421,10 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
 	// delegation_ledger_integration_test.go.
 	recordLedgerStatus(ctx, delegationID, "completed", "", responseText)
 	h.updateDelegationStatus(sourceID, delegationID, "completed", "")
-	h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_COMPLETE", sourceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 		"delegation_id":    delegationID,
 		"target_id":        targetID,
-		"response_preview": truncate(responseText, 200),
+		"response_preview": textutil.TruncateBytes(responseText, 200),
 	})
 	// RFC #2829 PR-2 result-push (see UpdateStatus for rationale).
 	pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", responseText, "")
@@ -503,10 +504,10 @@ func (h *DelegationHandler) Record(c *gin.Context) {
 	recordLedgerInsert(ctx, sourceID, body.TargetID, body.DelegationID, body.Task, "")
 	recordLedgerStatus(ctx, body.DelegationID, "dispatched", "", "")

-	h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_SENT", sourceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
 		"delegation_id": body.DelegationID,
 		"target_id":     body.TargetID,
-		"task_preview":  truncate(body.Task, 100),
+		"task_preview":  textutil.TruncateBytes(body.Task, 100),
 	})

 	c.JSON(http.StatusAccepted, gin.H{
@@ -555,12 +556,12 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
 		if _, err := db.DB.ExecContext(ctx, `
 			INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, summary, response_body, status)
 			VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4::jsonb, 'completed')
-		`, sourceID, sourceID, "Delegation completed ("+truncate(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
+		`, sourceID, sourceID, "Delegation completed ("+textutil.TruncateBytes(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
 			log.Printf("Delegation UpdateStatus: result insert failed for %s: %v", delegationID, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_COMPLETE", sourceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 			"delegation_id":    delegationID,
-			"response_preview": truncate(body.ResponsePreview, 200),
+			"response_preview": textutil.TruncateBytes(body.ResponsePreview, 200),
 		})
 		// RFC #2829 PR-2 result-push: when the gate is on, also write an
 		// a2a_receive row so the caller's inbox poller surfaces this to
@@ -570,7 +571,7 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
 		// the result instead of holding open an HTTP connection.
 		pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", body.ResponsePreview, "")
 	} else {
-		h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_FAILED", sourceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationFailed), sourceID, map[string]interface{}{
 			"delegation_id": delegationID,
 			"error":         body.Error,
 		})
@@ -626,7 +627,7 @@ func (h *DelegationHandler) ListDelegations(c *gin.Context) {
 			entry["error"] = errorDetail
 		}
 		if responseBody != "" {
-			entry["response_preview"] = truncate(responseBody, 300)
+			entry["response_preview"] = textutil.TruncateBytes(responseBody, 300)
 		}
 		delegations = append(delegations, entry)
 	}
@@ -727,9 +728,3 @@ func extractResponseText(body []byte) string {
 	return string(body)
 }

-func truncate(s string, max int) string {
-	if len(s) <= max {
-		return s
-	}
-	return s[:max] + "..."
-}
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )

 // delegation_ledger.go — durable per-task ledger for A2A delegation
@@ -50,40 +51,15 @@ func NewDelegationLedger(handle *sql.DB) *DelegationLedger {
 	return &DelegationLedger{db: handle}
 }

-// truncatePreview caps stored preview at 4KB. The full prompt/response is
-// already in activity_logs.{request,response}_body — this is the at-a-glance
-// view for the dashboard, not a forensic record.
+// previewCap caps stored preview at 4KB. The full prompt/response is
+// already in activity_logs.{request,response}_body — this is the
+// at-a-glance view for the dashboard, not a forensic record.
 //
-// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte
-// boundary, which on a multi-byte codepoint at byte 4096 produced
-// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted →
-// audit gap. Issue #2962. Walks the string by rune, stops at the last
-// rune-boundary index that fits inside the cap. ASCII-only strings hit
-// the cap exactly; CJK/emoji strings stop slightly under the cap,
-// never over.
-//
-// Mirrors the truncatePreviewRunes fix from agent_message_writer.go
-// (#2959). Both call sites should consume a shared helper after both
-// fixes have landed — followup deduplication tracked in #2962's body.
+// Truncation goes through textutil.TruncateBytesNoMarker so it's
+// rune-safe (#2026 / #2959 / #2962 bug class: byte-slice mid-codepoint
+// → Postgres JSONB rejects → silent INSERT failure → audit gap).
 const previewCap = 4096

-func truncatePreview(s string) string {
-	if len(s) <= previewCap {
-		return s
-	}
-	// Range over a string yields rune-boundary byte indices. Walk
-	// until the next index would exceed previewCap; the previous
-	// index is the safe truncation point.
-	end := 0
-	for i := range s {
-		if i > previewCap {
-			break
-		}
-		end = i
-	}
-	return s[:end]
-}
-
 // InsertOpts is the agent's record-of-intent. Caller, callee, task preview,
 // and the chosen delegation_id are required; idempotency_key is optional.
 type InsertOpts struct {
@@ -118,7 +94,7 @@ func (l *DelegationLedger) Insert(ctx context.Context, opts InsertOpts) {
 		) VALUES ($1, $2, $3, $4, 'queued', $5, $6)
 		ON CONFLICT (delegation_id) DO NOTHING
 	`, opts.DelegationID, opts.CallerID, opts.CalleeID,
-		truncatePreview(opts.TaskPreview), deadline, idemArg)
+		textutil.TruncateBytesNoMarker(opts.TaskPreview, previewCap), deadline, idemArg)
 	if err != nil {
 		log.Printf("delegation_ledger Insert(%s): %v", opts.DelegationID, err)
 	}
@@ -197,7 +173,7 @@ func (l *DelegationLedger) SetStatus(ctx context.Context,
 		    result_preview = NULLIF($4, ''),
 		    updated_at = now()
 		WHERE delegation_id = $1
-	`, delegationID, status, errorDetail, truncatePreview(resultPreview))
+	`, delegationID, status, errorDetail, textutil.TruncateBytesNoMarker(resultPreview, previewCap))
 	return err
 }

@@ -2,6 +2,7 @@ package handlers

 import (
 	"context"
+	"database/sql/driver"
 	"errors"
 	"strings"
 	"testing"
@@ -74,15 +75,20 @@ func TestLedgerInsert_TruncatesOversizedPreview(t *testing.T) {
 	mock := setupTestDB(t)
 	l := NewDelegationLedger(nil)

-	huge := strings.Repeat("x", 10_000) // > previewCap
+	// 4096 / 3 = 1365 runes; +10 for margin so we cross the cap.
+	// '世' is 3 bytes in UTF-8 (worst case for byte-cap rune walking).
+	huge := strings.Repeat("世", (previewCap/3)+10)
+	if len(huge) <= previewCap {
+		t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(huge), previewCap)
+	}

 	mock.ExpectExec(`INSERT INTO delegations`).
 		WithArgs(
 			"deleg-big",
 			"c", "ca",
-			sqlmock.AnyArg(), // truncated preview — verify length below via custom matcher
-			sqlmock.AnyArg(),
-			sqlmock.AnyArg(),
+			capValidUTF8Matcher{cap: previewCap}, // truncated preview must fit cap AND be valid UTF-8
+			sqlmock.AnyArg(),                     // deadline
+			sqlmock.AnyArg(),                     // idempotency_key
 		).
 		WillReturnResult(sqlmock.NewResult(0, 1))

@@ -97,87 +103,28 @@ func TestLedgerInsert_TruncatesOversizedPreview(t *testing.T) {
 	}
 }

-// ---------- truncatePreview unit ----------
+// capValidUTF8Matcher pins #2962 at the integration boundary: the
+// preview that lands in the INSERT MUST be valid UTF-8 (else Postgres
+// JSONB rejects → silent audit gap) AND fit within the byte cap. Pre-
+// migration this would have asserted on the corrupted "世" mid-codepoint
+// byte slice; post-migration it asserts the truncated preview is a
+// clean rune-aligned prefix.
+type capValidUTF8Matcher struct{ cap int }

-func TestTruncatePreview_UnderCap(t *testing.T) {
-	in := "short"
-	if got := truncatePreview(in); got != in {
-		t.Errorf("under-cap should passthrough; got %q", got)
+func (m capValidUTF8Matcher) Match(v driver.Value) bool {
+	s, ok := v.(string)
+	if !ok {
+		return false
 	}
+	return len(s) <= m.cap && utf8.ValidString(s)
 }

-func TestTruncatePreview_OverCapTruncatesAtBoundary(t *testing.T) {
-	in := strings.Repeat("a", previewCap+100)
-	got := truncatePreview(in)
-	if len(got) != previewCap {
-		t.Errorf("expected len=%d got len=%d", previewCap, len(got))
-	}
-}
-
-func TestTruncatePreview_ExactlyAtCap(t *testing.T) {
-	in := strings.Repeat("a", previewCap)
-	got := truncatePreview(in)
-	if got != in {
-		t.Errorf("at-cap should passthrough unchanged")
-	}
-}
-
-// TestTruncatePreview_NeverProducesInvalidUTF8 — pins #2962. The old
-// byte-slice implementation (s[:previewCap]) split on a byte boundary,
-// so a multi-byte codepoint straddling byte 4096 produced invalid
-// UTF-8 → Postgres JSONB rejects → ledger row not inserted → audit
-// gap. Test feeds a CJK / emoji-padded string longer than previewCap
-// and asserts utf8.ValidString on the result.
-func TestTruncatePreview_NeverProducesInvalidUTF8(t *testing.T) {
-	// Build a string of '世' (3 bytes per rune in UTF-8) that's just
-	// past the cap. With the old implementation, the slice at byte
-	// previewCap would land mid-rune and ValidString would fail.
-	// With the rune-aware implementation, the result is always valid
-	// UTF-8 even if the byte length is < previewCap.
-	rune3 := "世" // U+4E16, 3 bytes
-	// Need at least previewCap/3 + 1 runes so we cross the cap with
-	// margin to spare.
-	in := strings.Repeat(rune3, (previewCap/3)+10)
-	if len(in) <= previewCap {
-		t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(in), previewCap)
-	}
-	got := truncatePreview(in)
-	if !utf8.ValidString(got) {
-		t.Errorf("truncatePreview produced invalid UTF-8 — JSONB will reject this row. len(got)=%d", len(got))
-	}
-	if len(got) > previewCap {
-		t.Errorf("truncatePreview exceeded cap: len(got)=%d > previewCap=%d", len(got), previewCap)
-	}
-	// Defense-in-depth: the result should also be a clean rune
-	// prefix of the input — not some garbled sequence.
-	if !strings.HasPrefix(in, got) {
-		t.Errorf("truncatePreview should return a prefix of the input")
-	}
-}
-
-// TestTruncatePreview_MultiByteAtBoundary — most-targeted regression.
-// Feeds an input where the cap byte falls EXACTLY in the middle of a
-// 3-byte codepoint. Pre-fix, this is the case that produces invalid
-// UTF-8; post-fix, the truncate stops at the previous rune boundary.
-func TestTruncatePreview_MultiByteAtBoundary(t *testing.T) {
-	// Build a string that's `previewCap-1` ASCII bytes followed by
-	// '世' (3 bytes). Total = previewCap + 2. The old impl would
-	// slice at byte previewCap, landing inside the '世' codepoint.
-	prefix := strings.Repeat("a", previewCap-1)
-	in := prefix + "世"
-	if len(in) != previewCap+2 {
-		t.Fatalf("test setup: expected len %d, got %d", previewCap+2, len(in))
-	}
-	got := truncatePreview(in)
-	if !utf8.ValidString(got) {
-		t.Errorf("truncatePreview produced invalid UTF-8 at the multi-byte boundary case")
-	}
-	// Result should be exactly the ASCII prefix — '世' was past
-	// the cap so it must be dropped entirely.
-	if got != prefix {
-		t.Errorf("expected exact ASCII prefix, got %q (len=%d)", got[len(got)-10:], len(got))
-	}
-}
+// Helper-level truncation tests now live in
+// internal/textutil/truncate_test.go. The integration-level path
+// (TestLedgerInsert_TruncatesOversizedPreview above) still exercises
+// the previewCap boundary through the SQL write so a regression in
+// the wiring (wrong cap, wrong helper, missing call) would still go
+// red here.

 // ---------- SetStatus lifecycle ----------

@@ -0,0 +1,437 @@
+package handlers
+
+// eic_tunnel_pool.go — refcounted pool for EIC SSH tunnels keyed on
+// instanceID. Reuses one tunnel across N file ops, amortising the
+// ssh-keygen + SendSSHPublicKey + open-tunnel + waitForPort cost
+// (~3-5s) over multiple cats/finds (~50-200ms each).
+//
+// Origin: core#11 — canvas detail-panel config + filesystem load
+// took ~20s. ConfigTab fans out 4 GETs serially; the slowest is
+// /files/config.yaml which dispatches to readFileViaEIC. Without a
+// pool, every readFileViaEIC + listFilesViaEIC + writeFileViaEIC +
+// deleteFileViaEIC pays the full setup cost even when fired
+// back-to-back on the same workspace EC2.
+//
+// The pool keeps one eicSSHSession alive per instanceID for up to
+// poolTTL. SendSSHPublicKey grants a 60s key validity, so poolTTL
+// must stay strictly below that to avoid serving requests on a
+// just-expired key. We default to 50s with a 10s safety margin.
+//
+// Concurrency model:
+//
+//   - Single mutex guards the entries map.
+//   - Slow path (tunnel setup) runs OUTSIDE the lock, gated by an
+//     "intent" placeholder so concurrent acquires for the same
+//     instanceID don't both build a tunnel — the loser drops its
+//     setup and uses the winner's.
+//   - Refcount on each entry; eviction blocked while refcount > 0.
+//   - Janitor goroutine sweeps every poolJanitorInterval, drops
+//     entries where refcount == 0 && expiresAt < now.
+//
+// Test injection:
+//
+//   - poolSetupTunnel is a package-level var so tests can swap the
+//     slow path for a counting stub. Production wires it to
+//     realWithEICTunnel-style setup.
+//   - withEICTunnel (the public, single-shot API) is also a var
+//     (already, see template_files_eic.go). It's rebound here to
+//     pooledWithEICTunnel which routes through globalEICTunnelPool.
+//   - Tests that need single-shot behaviour can set poolTTL = 0,
+//     which makes pooledWithEICTunnel fall through to the underlying
+//     setup directly (no pool entry kept).
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+)
+
+// poolTTL is the maximum age of a pooled tunnel. Must be strictly
+// less than the SendSSHPublicKey grant window (60s) so we never
+// serve a request through a key that's about to expire mid-op.
+//
+// Configurable via init-time wiring (see initEICTunnelPool); not a
+// const so tests can pin TTL=0 (disable pooling) or TTL=50ms (drive
+// eviction tests).
+var poolTTL = 50 * time.Second
+
+// poolJanitorInterval is how often the janitor goroutine sweeps for
+// expired idle entries. Tighter than poolTTL so eviction is timely;
+// loose enough that the goroutine doesn't burn CPU.
+var poolJanitorInterval = 10 * time.Second
+
+// poolMaxEntries caps simultaneous instanceIDs the pool tracks.
+// Beyond this, new acquires evict the LRU entry. Defends against a
+// pathological caller (e.g. a sweep over hundreds of workspace
+// EC2s) from leaking unbounded tunnel processes. 32 is a generous
+// ceiling for the canvas use case (one human navigates ≤ ~5
+// workspaces at a time).
+var poolMaxEntries = 32
+
+// poolSetupTunnel is the slow-path tunnel constructor. Wrapped in a
+// var so tests can inject a counter stub. Returns a session and a
+// cleanup function (closes the open-tunnel subprocess + scrubs the
+// ephemeral keydir). nil session + non-nil err means setup failed
+// and there is nothing to clean up.
+//
+// Production wiring lives in eic_tunnel_pool_setup.go (a thin shim
+// over the existing realWithEICTunnel logic).
+var poolSetupTunnel = func(ctx context.Context, instanceID string) (
+	sess eicSSHSession, cleanup func(), err error) {
+	return setupRealEICTunnel(ctx, instanceID)
+}
+
+// pooledTunnel is one entry in the pool. session is shared by N
+// concurrent fn calls; cleanup runs once when refcount returns to
+// zero AND the entry is past expiresAt or evicted.
+//
+// lastUsed tracks the most recent acquire time for LRU bookkeeping
+// (overflow eviction). expiresAt is set at construction and not
+// extended on use — a tunnel cannot live past poolTTL even if it's
+// hot, because the underlying SendSSHPublicKey grant expires.
+type pooledTunnel struct {
+	session   eicSSHSession
+	cleanup   func()
+	expiresAt time.Time
+	lastUsed  time.Time
+	refcount  int
+	poisoned  bool // true if a fn returned a tunnel-fatal error; do not reuse
+}
+
+// eicTunnelPool is the package-level pool. Single instance lives
+// in globalEICTunnelPool; constructor runs lazily on first acquire.
+type eicTunnelPool struct {
+	mu      sync.Mutex
+	entries map[string]*pooledTunnel
+	// pendingSetups guards concurrent setup for the same instanceID.
+	// First acquirer takes the slot; later ones wait on the channel.
+	pendingSetups map[string]chan struct{}
+	stopJanitor   chan struct{}
+}
+
+var (
+	globalEICTunnelPool     *eicTunnelPool
+	globalEICTunnelPoolOnce sync.Once
+)
+
+// getEICTunnelPool returns the singleton pool, lazy-initialising on
+// first call. Idempotent.
+func getEICTunnelPool() *eicTunnelPool {
+	globalEICTunnelPoolOnce.Do(func() {
+		globalEICTunnelPool = newEICTunnelPool()
+		go globalEICTunnelPool.janitor()
+	})
+	return globalEICTunnelPool
+}
+
+// newEICTunnelPool constructs an empty pool. Exported so tests can
+// build isolated pools without sharing the singleton.
+func newEICTunnelPool() *eicTunnelPool {
+	return &eicTunnelPool{
+		entries:       map[string]*pooledTunnel{},
+		pendingSetups: map[string]chan struct{}{},
+		stopJanitor:   make(chan struct{}),
+	}
+}
+
+// acquire returns a usable session for instanceID. If a healthy entry
+// exists, refcount++ and return it. If a setup is in flight for the
+// same instanceID, wait for it. Otherwise build one (slow path).
+//
+// done() must be called by the caller when the op finishes. It
+// decrements refcount and triggers cleanup if the entry is past
+// TTL or poisoned and refcount==0.
+//
+// Errors from the slow path propagate; pool state is not modified
+// for failed setups (no poisoned entry created — that's only for
+// fn-returned errors on a previously-good session).
+func (p *eicTunnelPool) acquire(ctx context.Context, instanceID string) (
+	sess eicSSHSession, done func(poisoned bool), err error) {
+
+	if poolTTL <= 0 {
+		// Pool disabled (TTL=0 mode for tests / opt-out). Fall
+		// through to a direct setup with caller-driven cleanup.
+		s, cleanup, err := poolSetupTunnel(ctx, instanceID)
+		if err != nil {
+			return eicSSHSession{}, nil, err
+		}
+		return s, func(_ bool) { cleanup() }, nil
+	}
+
+	for {
+		p.mu.Lock()
+		if pt, ok := p.entries[instanceID]; ok && !pt.poisoned && pt.expiresAt.After(time.Now()) {
+			pt.refcount++
+			pt.lastUsed = time.Now()
+			p.mu.Unlock()
+			return pt.session, p.releaser(instanceID, pt), nil
+		}
+		// Either no entry, expired entry, or poisoned entry. If a
+		// setup is already in flight, wait and retry.
+		if pending, ok := p.pendingSetups[instanceID]; ok {
+			p.mu.Unlock()
+			select {
+			case <-pending:
+				continue // re-check the entries map
+			case <-ctx.Done():
+				return eicSSHSession{}, nil, ctx.Err()
+			}
+		}
+		// Drop expired/poisoned entry now (we'll cleanup outside
+		// the lock — the entry is unreferenced or we'd not be here).
+		var oldCleanup func()
+		if pt, ok := p.entries[instanceID]; ok {
+			if pt.refcount == 0 {
+				oldCleanup = pt.cleanup
+				delete(p.entries, instanceID)
+			}
+		}
+		// Reserve the setup slot.
+		signal := make(chan struct{})
+		p.pendingSetups[instanceID] = signal
+		p.mu.Unlock()
+
+		if oldCleanup != nil {
+			go oldCleanup()
+		}
+
+		// Slow path: build a new tunnel. Anything that goes wrong
+		// here cleans up the pendingSetups slot and propagates to
+		// the caller without leaving the pool in a state where the
+		// next acquire blocks waiting on a signal that never fires.
+		newSess, cleanup, setupErr := poolSetupTunnel(ctx, instanceID)
+
+		p.mu.Lock()
+		delete(p.pendingSetups, instanceID)
+		close(signal)
+
+		if setupErr != nil {
+			p.mu.Unlock()
+			return eicSSHSession{}, nil, fmt.Errorf("eic tunnel setup: %w", setupErr)
+		}
+
+		// Enforce LRU bound BEFORE inserting so we don't briefly
+		// exceed the cap even by one entry.
+		p.evictLRUIfFullLocked(instanceID)
+
+		pt := &pooledTunnel{
+			session:   newSess,
+			cleanup:   cleanup,
+			expiresAt: time.Now().Add(poolTTL),
+			lastUsed:  time.Now(),
+			refcount:  1,
+		}
+		p.entries[instanceID] = pt
+		p.mu.Unlock()
+		return pt.session, p.releaser(instanceID, pt), nil
+	}
+}
+
+// releaser returns a closure that decrements refcount and triggers
+// cleanup if (a) the entry is past TTL or (b) the caller signalled
+// poison. Idempotent against double-release (decrements once via the
+// captured pt; pool entry may have been replaced by then).
+func (p *eicTunnelPool) releaser(instanceID string, pt *pooledTunnel) func(poisoned bool) {
+	released := false
+	return func(poisoned bool) {
+		p.mu.Lock()
+		defer p.mu.Unlock()
+		if released {
+			return
+		}
+		released = true
+		pt.refcount--
+		if poisoned {
+			pt.poisoned = true
+		}
+		// Evict immediately if poisoned-and-idle OR expired-and-idle.
+		// Hot entries (refcount > 0) defer eviction to the last release.
+		if pt.refcount == 0 && (pt.poisoned || pt.expiresAt.Before(time.Now())) {
+			// If the entry in the map is still us, remove it.
+			if cur, ok := p.entries[instanceID]; ok && cur == pt {
+				delete(p.entries, instanceID)
+			}
+			go pt.cleanup()
+		}
+	}
+}
+
+// evictLRUIfFullLocked drops the least-recently-used IDLE entry
+// when the pool is at capacity. Caller must hold p.mu. The new
+// instanceID about to be inserted is excluded so we don't evict
+// ourselves. If no idle entries exist, no eviction happens — the
+// new entry will push us above the soft cap until something releases.
+func (p *eicTunnelPool) evictLRUIfFullLocked(skipInstance string) {
+	if len(p.entries) < poolMaxEntries {
+		return
+	}
+	var oldestKey string
+	var oldest *pooledTunnel
+	for k, pt := range p.entries {
+		if k == skipInstance {
+			continue
+		}
+		if pt.refcount > 0 {
+			continue
+		}
+		if oldest == nil || pt.lastUsed.Before(oldest.lastUsed) {
+			oldestKey = k
+			oldest = pt
+		}
+	}
+	if oldest == nil {
+		return // every entry is in use; no eviction possible
+	}
+	delete(p.entries, oldestKey)
+	go oldest.cleanup()
+}
+
+// janitor periodically scans for entries that are idle AND expired,
+// closing their tunnels. Runs forever (per pool lifetime); cancelled
+// by close(p.stopJanitor) for tests that build short-lived pools.
+func (p *eicTunnelPool) janitor() {
+	t := time.NewTicker(poolJanitorInterval)
+	defer t.Stop()
+	for {
+		select {
+		case <-t.C:
+			p.sweep()
+		case <-p.stopJanitor:
+			return
+		}
+	}
+}
+
+// sweep is one janitor pass. Drops idle expired entries.
+func (p *eicTunnelPool) sweep() {
+	p.mu.Lock()
+	now := time.Now()
+	var toClose []func()
+	for k, pt := range p.entries {
+		if pt.refcount == 0 && pt.expiresAt.Before(now) {
+			toClose = append(toClose, pt.cleanup)
+			delete(p.entries, k)
+		}
+	}
+	p.mu.Unlock()
+	for _, c := range toClose {
+		go c()
+	}
+}
+
+// stop terminates the janitor and closes all idle entries. Hot
+// (refcount > 0) entries are NOT force-closed — callers running
+// against them would see a use-after-free. In practice stop is only
+// called by tests that have already drained their callers.
+func (p *eicTunnelPool) stop() {
+	close(p.stopJanitor)
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	for k, pt := range p.entries {
+		if pt.refcount == 0 {
+			go pt.cleanup()
+			delete(p.entries, k)
+		}
+	}
+}
+
+// pooledWithEICTunnel is the pool-backed replacement for
+// realWithEICTunnel. The signature matches `var withEICTunnel`
+// exactly so the rebind (in initEICTunnelPool) is a drop-in.
+//
+// Errors from `fn` itself are forwarded to the caller AND mark the
+// pool entry as poisoned, so the next acquire builds a fresh
+// tunnel. This catches the case where the workspace EC2 was
+// restarted out-of-band (tunnel still appears alive locally but
+// every cat/find errors out).
+func pooledWithEICTunnel(ctx context.Context, instanceID string,
+	fn func(s eicSSHSession) error) error {
+	pool := getEICTunnelPool()
+	sess, done, err := pool.acquire(ctx, instanceID)
+	if err != nil {
+		return err
+	}
+	// poisoned defaults to true so a panic from fn poisons the
+	// entry on the way through the deferred release. Without the
+	// defer, a panicking fn would leak refcount=1 forever and
+	// permanently block eviction of this entry. The fn-error path
+	// resets poisoned to its real classification before return.
+	poisoned := true
+	defer func() { done(poisoned) }()
+	fnErr := fn(sess)
+	poisoned = fnErrIndicatesTunnelFault(fnErr)
+	return fnErr
+}
+
+// fnErrIndicatesTunnelFault returns true for fn errors whose nature
+// suggests the underlying tunnel is no longer reusable (auth gone,
+// network gone, ssh process dead). Returning true poisons the pool
+// entry so the next acquire builds fresh.
+//
+// Conservative: only marks tunnel-faulty for clearly tunnel-level
+// failures (connection refused, broken pipe, ssh exit-status from
+// fatal-channel signals). A `cat` returning os.ErrNotExist on a
+// missing file is NOT a tunnel fault — that's the file path being
+// wrong, the tunnel is fine.
+func fnErrIndicatesTunnelFault(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := err.Error()
+	// stderr substrings produced by ssh when the tunnel is broken.
+	for _, marker := range []string{
+		"connection refused",
+		"connection closed",
+		"broken pipe",
+		"Connection reset by peer",
+		"kex_exchange_identification",
+		"port forwarding failed",
+		"Permission denied",
+		"Authentication failed",
+	} {
+		if containsCaseInsensitive(msg, marker) {
+			return true
+		}
+	}
+	return false
+}
+
+// containsCaseInsensitive avoids importing strings just for this
+// (the file already needs ssh stderr matching elsewhere — this
+// keeps the helper local to avoid a cross-file dependency).
+func containsCaseInsensitive(s, substr string) bool {
+	if len(substr) > len(s) {
+		return false
+	}
+	// Manual lowercase compare loop; ssh error markers are ASCII so
+	// no need for unicode-aware folding.
+	low := func(b byte) byte {
+		if b >= 'A' && b <= 'Z' {
+			return b + 32
+		}
+		return b
+	}
+	for i := 0; i+len(substr) <= len(s); i++ {
+		match := true
+		for j := 0; j < len(substr); j++ {
+			if low(s[i+j]) != low(substr[j]) {
+				match = false
+				break
+			}
+		}
+		if match {
+			return true
+		}
+	}
+	return false
+}
+
+// initEICTunnelPool rebinds the package-level withEICTunnel var to
+// the pooled implementation. Called once at package init via the
+// init() in eic_tunnel_pool_setup.go (split file so the rebind
+// itself is testable without dragging in the production setup
+// shim's exec/aws dependencies).
+func initEICTunnelPool() {
+	withEICTunnel = pooledWithEICTunnel
+}
@@ -0,0 +1,136 @@
+package handlers
+
+// eic_tunnel_pool_setup.go — production setup shim.
+//
+// setupRealEICTunnel decomposes the existing realWithEICTunnel into
+// its slow half (build the tunnel) and its caller half (run fn). The
+// pool calls the slow half once and shares the resulting session
+// across N callers, holding cleanup until the last release.
+//
+// Why decompose instead of refactoring realWithEICTunnel: the
+// existing function and its test stub-vars (withEICTunnel,
+// sendSSHPublicKey, openTunnelCmd) are load-bearing for the
+// dispatch tests. Extracting a sibling setup function preserves the
+// existing single-shot path verbatim — the pool wraps it by calling
+// realWithEICTunnel through a thin adapter, leaving the tested
+// surface unchanged.
+//
+// The pool's acquire() invokes poolSetupTunnel, which is a `var`
+// pointing to setupRealEICTunnel for production and a counting stub
+// for tests.
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+// setupRealEICTunnel is the slow path that the pool consumes when
+// no warm entry exists. Mirrors realWithEICTunnel's setup half but
+// returns the session + cleanup instead of running fn inline.
+//
+// The cleanup func owns the tunnel subprocess, ephemeral key dir,
+// and a one-time wait. Idempotent — calling it twice is safe; the
+// pool guarantees one call per session, but defence-in-depth helps
+// when tests run pools in parallel and racy sweeps re-trigger.
+func setupRealEICTunnel(ctx context.Context, instanceID string) (
+	eicSSHSession, func(), error) {
+
+	if instanceID == "" {
+		return eicSSHSession{}, nil,
+			fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
+	}
+	osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
+	if osUser == "" {
+		osUser = "ubuntu"
+	}
+	region := os.Getenv("AWS_REGION")
+	if region == "" {
+		region = "us-east-2"
+	}
+
+	keyDir, err := os.MkdirTemp("", "molecule-eic-pool-*")
+	if err != nil {
+		return eicSSHSession{}, nil, fmt.Errorf("keydir mkdir: %w", err)
+	}
+	keyPath := keyDir + "/id"
+	if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
+		"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
+		"-C", "molecule-eic-pool",
+	).CombinedOutput(); kerr != nil {
+		_ = os.RemoveAll(keyDir)
+		return eicSSHSession{}, nil,
+			fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
+	}
+	pubKey, err := os.ReadFile(keyPath + ".pub")
+	if err != nil {
+		_ = os.RemoveAll(keyDir)
+		return eicSSHSession{}, nil, fmt.Errorf("read pubkey: %w", err)
+	}
+
+	if err := sendSSHPublicKey(ctx, region, instanceID, osUser,
+		strings.TrimSpace(string(pubKey))); err != nil {
+		_ = os.RemoveAll(keyDir)
+		return eicSSHSession{}, nil, fmt.Errorf("send-ssh-public-key: %w", err)
+	}
+
+	localPort, err := pickFreePort()
+	if err != nil {
+		_ = os.RemoveAll(keyDir)
+		return eicSSHSession{}, nil, fmt.Errorf("pick free port: %w", err)
+	}
+
+	tunnel := openTunnelCmd(eicSSHOptions{
+		InstanceID:     instanceID,
+		OSUser:         osUser,
+		Region:         region,
+		LocalPort:      localPort,
+		PrivateKeyPath: keyPath,
+	})
+	tunnel.Env = os.Environ()
+	if err := tunnel.Start(); err != nil {
+		_ = os.RemoveAll(keyDir)
+		return eicSSHSession{}, nil, fmt.Errorf("open-tunnel start: %w", err)
+	}
+
+	if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil {
+		if tunnel.Process != nil {
+			_ = tunnel.Process.Kill()
+		}
+		_ = tunnel.Wait()
+		_ = os.RemoveAll(keyDir)
+		return eicSSHSession{}, nil, fmt.Errorf("tunnel never listened: %w", err)
+	}
+
+	cleanedUp := false
+	cleanup := func() {
+		if cleanedUp {
+			return
+		}
+		cleanedUp = true
+		if tunnel.Process != nil {
+			_ = tunnel.Process.Kill()
+		}
+		_ = tunnel.Wait()
+		_ = os.RemoveAll(keyDir)
+	}
+
+	return eicSSHSession{
+		keyPath:    keyPath,
+		localPort:  localPort,
+		osUser:     osUser,
+		instanceID: instanceID,
+	}, cleanup, nil
+}
+
+// init wires the pool into the package-level withEICTunnel var so
+// every read/write/list/delete EIC op uses pooled tunnels by default.
+// Test files that need single-shot behaviour can swap withEICTunnel
+// back via the existing stubWithEICTunnel pattern, OR set poolTTL=0
+// to disable pooling without rebinding the var.
+func init() {
+	initEICTunnelPool()
+}
@@ -0,0 +1,467 @@
+package handlers
+
+// eic_tunnel_pool_test.go — tests for the refcounted EIC tunnel pool
+// added in core#11. Stubs poolSetupTunnel with a counter so the
+// tests don't fork ssh-keygen / aws subprocesses.
+//
+// Per memory feedback_assert_exact_not_substring: each test pins
+// exact expected counts (not "at least N") so a regression that
+// silently double-sets-up surfaces here.
+
+import (
+	"context"
+	"errors"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// withPoolSetupStub swaps poolSetupTunnel for a counting fake that
+// returns a sentinel session and a cleanup func that records its
+// invocation. Restores on test cleanup.
+//
+// setupSignal blocks each setup until released — for concurrent-
+// acquire tests where we want to gate setup completion.
+func withPoolSetupStub(t *testing.T) (
+	setupCount *int64, cleanupCount *int64, restore func(), unblock func()) {
+	t.Helper()
+	prev := poolSetupTunnel
+	prevTTL := poolTTL
+	prevJanitor := poolJanitorInterval
+
+	var sc, cc int64
+	setupCount, cleanupCount = &sc, &cc
+
+	gate := make(chan struct{}, 1)
+	gate <- struct{}{} // allow the first setup through immediately
+	unblock = func() { gate <- struct{}{} }
+
+	poolSetupTunnel = func(ctx context.Context, instanceID string) (
+		eicSSHSession, func(), error) {
+		select {
+		case <-gate:
+		case <-ctx.Done():
+			return eicSSHSession{}, nil, ctx.Err()
+		}
+		atomic.AddInt64(&sc, 1)
+		sess := eicSSHSession{
+			instanceID: instanceID,
+			osUser:     "ubuntu",
+			localPort:  10000 + int(atomic.LoadInt64(&sc)),
+			keyPath:    "/tmp/molecule-eic-test-" + instanceID,
+		}
+		cleanup := func() { atomic.AddInt64(&cc, 1) }
+		return sess, cleanup, nil
+	}
+
+	restore = func() {
+		poolSetupTunnel = prev
+		poolTTL = prevTTL
+		poolJanitorInterval = prevJanitor
+	}
+	t.Cleanup(restore)
+	return
+}
+
+// freshPool returns an isolated pool (NOT the global) so tests run
+// independently. Stops the janitor on cleanup.
+func freshPool(t *testing.T) *eicTunnelPool {
+	t.Helper()
+	p := newEICTunnelPool()
+	t.Cleanup(p.stop)
+	return p
+}
+
+// TestEICTunnelPool_FourOpsAmortise pins the core invariant: four
+// sequential acquire/release cycles on the same instanceID share
+// ONE underlying tunnel setup. Mutation: delete the cache hit branch
+// in acquire() → setupCount goes 1 → 4 → test fails.
+func TestEICTunnelPool_FourOpsAmortise(t *testing.T) {
+	setupCount, cleanupCount, _, _ := withPoolSetupStub(t)
+	// Refill gate after each setup so concurrent stubs aren't blocked
+	// (we want every test to be able to set up if it needs to).
+	t.Cleanup(func() { /* no-op; defer is enough */ })
+	poolTTL = 50 * time.Second
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	for i := 0; i < 4; i++ {
+		sess, done, err := pool.acquire(ctx, "i-test-1")
+		if err != nil {
+			t.Fatalf("op %d: acquire: %v", i, err)
+		}
+		if sess.instanceID != "i-test-1" {
+			t.Fatalf("op %d: session has wrong instanceID: %q", i, sess.instanceID)
+		}
+		done(false)
+	}
+
+	if got := atomic.LoadInt64(setupCount); got != 1 {
+		t.Errorf("expected exactly 1 tunnel setup across 4 ops, got %d", got)
+	}
+	if got := atomic.LoadInt64(cleanupCount); got != 0 {
+		t.Errorf("expected 0 cleanups while entry is hot (TTL=50s), got %d", got)
+	}
+}
+
+// TestEICTunnelPool_DifferentInstancesDoNotShare pins that two
+// different instanceIDs each get their own tunnel — the pool is
+// keyed on instanceID, not a single global slot.
+func TestEICTunnelPool_DifferentInstancesDoNotShare(t *testing.T) {
+	setupCount, _, _, unblock := withPoolSetupStub(t)
+	poolTTL = 50 * time.Second
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	// First instance setup uses the initial gate slot.
+	_, doneA, err := pool.acquire(ctx, "i-a")
+	if err != nil {
+		t.Fatalf("acquire A: %v", err)
+	}
+	doneA(false)
+
+	// Second instance needs a new slot through the gate.
+	unblock()
+	_, doneB, err := pool.acquire(ctx, "i-b")
+	if err != nil {
+		t.Fatalf("acquire B: %v", err)
+	}
+	doneB(false)
+
+	if got := atomic.LoadInt64(setupCount); got != 2 {
+		t.Errorf("expected 2 setups (one per instance), got %d", got)
+	}
+}
+
+// TestEICTunnelPool_TTLEviction: a short TTL forces the second op
+// to build a fresh tunnel after the first expires.
+func TestEICTunnelPool_TTLEviction(t *testing.T) {
+	setupCount, cleanupCount, _, unblock := withPoolSetupStub(t)
+	poolTTL = 50 * time.Millisecond
+	poolJanitorInterval = 1 * time.Second // keep janitor away
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	_, done, err := pool.acquire(ctx, "i-ttl")
+	if err != nil {
+		t.Fatalf("acquire 1: %v", err)
+	}
+	done(false)
+
+	time.Sleep(80 * time.Millisecond) // past TTL
+
+	unblock() // allow next setup
+	_, done, err = pool.acquire(ctx, "i-ttl")
+	if err != nil {
+		t.Fatalf("acquire 2: %v", err)
+	}
+	done(false)
+
+	if got := atomic.LoadInt64(setupCount); got != 2 {
+		t.Errorf("expected 2 setups (TTL eviction between), got %d", got)
+	}
+	// First entry should have been cleaned up when the second
+	// acquire evicted it on the slow path. Cleanup runs in a
+	// goroutine; poll briefly for it to land.
+	deadline := time.Now().Add(500 * time.Millisecond)
+	for atomic.LoadInt64(cleanupCount) < 1 && time.Now().Before(deadline) {
+		time.Sleep(5 * time.Millisecond)
+	}
+	if got := atomic.LoadInt64(cleanupCount); got < 1 {
+		t.Errorf("expected ≥1 cleanup (first entry evicted), got %d", got)
+	}
+}
+
+// TestEICTunnelPool_FailureInvalidates pins the poison-on-fault
+// behavior — fn returning a tunnel-fatal error marks the entry
+// unusable so the next acquire builds fresh.
+func TestEICTunnelPool_FailureInvalidates(t *testing.T) {
+	setupCount, _, _, unblock := withPoolSetupStub(t)
+	poolTTL = 50 * time.Second
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	_, done, err := pool.acquire(ctx, "i-fault")
+	if err != nil {
+		t.Fatalf("acquire 1: %v", err)
+	}
+	done(true) // signal poison
+
+	unblock() // let the next setup through
+	_, done, err = pool.acquire(ctx, "i-fault")
+	if err != nil {
+		t.Fatalf("acquire 2: %v", err)
+	}
+	done(false)
+
+	if got := atomic.LoadInt64(setupCount); got != 2 {
+		t.Errorf("expected 2 setups (poison forced rebuild), got %d", got)
+	}
+}
+
+// TestEICTunnelPool_ConcurrentAcquireSingleSetup pins that N
+// concurrent acquires for the same instanceID before any release
+// only trigger ONE tunnel setup — the rest wait via pendingSetups.
+//
+// Without this guard each concurrent acquire would spawn its own
+// tunnel and the loser-cleanup would still leak refcount. Mutation:
+// delete the pendingSetups gate → setupCount goes 1 → N → fails.
+func TestEICTunnelPool_ConcurrentAcquireSingleSetup(t *testing.T) {
+	setupCount, _, _, _ := withPoolSetupStub(t)
+	// Pause setup so all goroutines pile into the pending slot.
+	prev := poolSetupTunnel
+	gate := make(chan struct{})
+	poolSetupTunnel = func(ctx context.Context, instanceID string) (
+		eicSSHSession, func(), error) {
+		<-gate
+		atomic.AddInt64(setupCount, 1)
+		return eicSSHSession{instanceID: instanceID}, func() {}, nil
+	}
+	t.Cleanup(func() { poolSetupTunnel = prev })
+
+	poolTTL = 50 * time.Second
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	const N = 8
+	type result struct {
+		done func(bool)
+		err  error
+	}
+	results := make(chan result, N)
+	var startWg sync.WaitGroup
+	startWg.Add(N)
+	for i := 0; i < N; i++ {
+		go func() {
+			startWg.Done()
+			_, done, err := pool.acquire(ctx, "i-concurrent")
+			results <- result{done, err}
+		}()
+	}
+	startWg.Wait()
+	// give all N goroutines time to enter pool.acquire
+	time.Sleep(20 * time.Millisecond)
+	close(gate)
+
+	for i := 0; i < N; i++ {
+		r := <-results
+		if r.err != nil {
+			t.Fatalf("acquire %d: %v", i, r.err)
+		}
+		r.done(false)
+	}
+
+	if got := atomic.LoadInt64(setupCount); got != 1 {
+		t.Errorf("expected 1 setup across %d concurrent acquires, got %d", N, got)
+	}
+}
+
+// TestEICTunnelPool_TTLZeroDisablesPooling pins the escape hatch:
+// poolTTL=0 means every acquire goes straight through to setup +
+// cleanup, no entry kept. Useful for tests / opt-out.
+func TestEICTunnelPool_TTLZeroDisablesPooling(t *testing.T) {
+	setupCount, cleanupCount, _, unblock := withPoolSetupStub(t)
+	poolTTL = 0
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	_, done, err := pool.acquire(ctx, "i-ttlzero")
+	if err != nil {
+		t.Fatalf("acquire 1: %v", err)
+	}
+	done(false)
+
+	unblock()
+	_, done, err = pool.acquire(ctx, "i-ttlzero")
+	if err != nil {
+		t.Fatalf("acquire 2: %v", err)
+	}
+	done(false)
+
+	if got := atomic.LoadInt64(setupCount); got != 2 {
+		t.Errorf("expected 2 setups with TTL=0 (pool disabled), got %d", got)
+	}
+	if got := atomic.LoadInt64(cleanupCount); got != 2 {
+		t.Errorf("expected 2 cleanups with TTL=0 (each release closes), got %d", got)
+	}
+}
+
+// TestEICTunnelPool_LRUEvictionAtCap pins the LRU defence: when the
+// pool reaches poolMaxEntries, a new acquire for an unseen
+// instanceID evicts the LRU idle entry instead of growing unbounded.
+func TestEICTunnelPool_LRUEvictionAtCap(t *testing.T) {
+	setupCount, cleanupCount, _, _ := withPoolSetupStub(t)
+	prev := poolMaxEntries
+	poolMaxEntries = 2
+	t.Cleanup(func() { poolMaxEntries = prev })
+	poolTTL = 50 * time.Second
+
+	// Replace stub with one that doesn't gate so we can fill quickly.
+	poolSetupTunnel = func(ctx context.Context, instanceID string) (
+		eicSSHSession, func(), error) {
+		atomic.AddInt64(setupCount, 1)
+		return eicSSHSession{instanceID: instanceID}, func() {
+			atomic.AddInt64(cleanupCount, 1)
+		}, nil
+	}
+
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	for _, id := range []string{"i-1", "i-2"} {
+		_, done, err := pool.acquire(ctx, id)
+		if err != nil {
+			t.Fatalf("acquire %s: %v", id, err)
+		}
+		done(false)
+	}
+	// Both entries idle, pool at cap.
+	_, done, err := pool.acquire(ctx, "i-3")
+	if err != nil {
+		t.Fatalf("acquire i-3: %v", err)
+	}
+	done(false)
+
+	// Wait for the goroutine'd cleanup of the evicted entry.
+	deadline := time.Now().Add(500 * time.Millisecond)
+	for atomic.LoadInt64(cleanupCount) < 1 && time.Now().Before(deadline) {
+		time.Sleep(10 * time.Millisecond)
+	}
+
+	if got := atomic.LoadInt64(setupCount); got != 3 {
+		t.Errorf("expected 3 setups (one per unique instance), got %d", got)
+	}
+	if got := atomic.LoadInt64(cleanupCount); got < 1 {
+		t.Errorf("expected ≥1 cleanup (LRU eviction), got %d", got)
+	}
+}
+
+// TestEICTunnelPool_PoisonedClassification pins the heuristic that
+// distinguishes tunnel-fatal errors (poison the entry) from
+// app-level errors (file not found, validation) that should NOT
+// invalidate the tunnel.
+func TestEICTunnelPool_PoisonedClassification(t *testing.T) {
+	cases := []struct {
+		name string
+		err  error
+		want bool
+	}{
+		{"nil", nil, false},
+		{"file not found", errors.New("os: file does not exist"), false},
+		{"validation", errors.New("invalid path: must be relative"), false},
+		{"connection refused", errors.New("ssh: connect to host: connection refused"), true},
+		{"connection refused upper", errors.New("Connection Refused"), true},
+		{"broken pipe", errors.New("write tunnel: broken pipe"), true},
+		{"permission denied", errors.New("Permission denied (publickey)"), true},
+		{"auth failed", errors.New("Authentication failed"), true},
+		{"connection reset", errors.New("Connection reset by peer"), true},
+		{"port forward", errors.New("port forwarding failed"), true},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := fnErrIndicatesTunnelFault(tc.err)
+			if got != tc.want {
+				t.Errorf("fnErrIndicatesTunnelFault(%v) = %v, want %v",
+					tc.err, got, tc.want)
+			}
+		})
+	}
+}
+
+// TestEICTunnelPool_RefcountBlocksEviction pins that an entry past
+// TTL is NOT evicted while a caller still holds it — preventing
+// use-after-free in the holder.
+func TestEICTunnelPool_RefcountBlocksEviction(t *testing.T) {
+	setupCount, cleanupCount, _, _ := withPoolSetupStub(t)
+	poolTTL = 30 * time.Millisecond
+	poolJanitorInterval = 5 * time.Millisecond
+	pool := freshPool(t)
+	ctx := context.Background()
+
+	_, done, err := pool.acquire(ctx, "i-hold")
+	if err != nil {
+		t.Fatalf("acquire: %v", err)
+	}
+
+	// Sleep past TTL while holding the session. Janitor sweeps
+	// every 5ms but must skip our entry because refcount=1.
+	time.Sleep(80 * time.Millisecond)
+
+	if got := atomic.LoadInt64(cleanupCount); got != 0 {
+		t.Errorf("expected 0 cleanups while holder is active, got %d", got)
+	}
+
+	done(false)
+	// Now refcount=0 and entry is past TTL; releaser triggers cleanup.
+	deadline := time.Now().Add(200 * time.Millisecond)
+	for atomic.LoadInt64(cleanupCount) < 1 && time.Now().Before(deadline) {
+		time.Sleep(5 * time.Millisecond)
+	}
+	if got := atomic.LoadInt64(cleanupCount); got != 1 {
+		t.Errorf("expected 1 cleanup after release of expired entry, got %d", got)
+	}
+	if got := atomic.LoadInt64(setupCount); got != 1 {
+		t.Errorf("setupCount tracking: got %d, want 1", got)
+	}
+}
+
+// TestPooledWithEICTunnel_PanicPoisonsEntry pins that a panic
+// from fn poisons the pool entry on the way out — refcount goes
+// back to zero (no leak) and the entry is marked unusable so the
+// next acquire builds fresh. Without the defer-release pattern, a
+// panic would leave refcount=1 forever and the entry would never
+// evict.
+func TestPooledWithEICTunnel_PanicPoisonsEntry(t *testing.T) {
+	setupCount, _, _, _ := withPoolSetupStub(t)
+	poolTTL = 50 * time.Second
+	globalEICTunnelPool = newEICTunnelPool()
+	t.Cleanup(globalEICTunnelPool.stop)
+
+	func() {
+		defer func() {
+			if r := recover(); r == nil {
+				t.Errorf("expected panic to bubble up, got nil")
+			}
+		}()
+		_ = pooledWithEICTunnel(context.Background(), "i-panic",
+			func(s eicSSHSession) error { panic("boom") })
+	}()
+
+	// Replenish the gate so the next setup can run.
+	prev := poolSetupTunnel
+	poolSetupTunnel = func(ctx context.Context, instanceID string) (
+		eicSSHSession, func(), error) {
+		atomic.AddInt64(setupCount, 1)
+		return eicSSHSession{instanceID: instanceID}, func() {}, nil
+	}
+	t.Cleanup(func() { poolSetupTunnel = prev })
+
+	// Next acquire must build fresh — entry was poisoned by panic.
+	if err := pooledWithEICTunnel(context.Background(), "i-panic",
+		func(s eicSSHSession) error { return nil }); err != nil {
+		t.Fatalf("post-panic acquire: %v", err)
+	}
+	if got := atomic.LoadInt64(setupCount); got != 2 {
+		t.Errorf("expected 2 setups (panic poisoned, rebuild), got %d", got)
+	}
+}
+
+// TestPooledWithEICTunnel_PreservesFnErr pins that errors from the
+// inner fn pass through to the caller verbatim — pool wrapping
+// should not swallow or transform error semantics for app code.
+func TestPooledWithEICTunnel_PreservesFnErr(t *testing.T) {
+	withPoolSetupStub(t)
+	poolTTL = 50 * time.Second
+
+	// Reset the global pool so this test is isolated from any prior
+	// test that may have populated it.
+	globalEICTunnelPool = newEICTunnelPool()
+
+	want := errors.New("file does not exist")
+	got := pooledWithEICTunnel(context.Background(), "i-fn-err",
+		func(s eicSSHSession) error { return want })
+	if !errors.Is(got, want) {
+		t.Errorf("pooledWithEICTunnel returned %v, want %v", got, want)
+	}
+}
@@ -8,6 +8,7 @@ import (
 	"net/http"

 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
 	"github.com/gin-gonic/gin"
 )
@@ -100,7 +101,7 @@ func (h *WorkspaceHandler) RotateExternalCredentials(c *gin.Context) {
 	// see when credentials were rotated. No PII; the token plaintext
 	// is NOT logged.
 	if h.broadcaster != nil {
-		h.broadcaster.RecordAndBroadcast(ctx, "EXTERNAL_CREDENTIALS_ROTATED", id, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventExternalCredentialsRotated), id, map[string]interface{}{
 			"workspace_id": id,
 		})
 	}
@@ -331,43 +331,84 @@ func memoryToView(m contract.Memory) MemoryView {
 }

 // namespacesToViews converts resolver namespaces into UI-friendly
-// views. Stable sort: workspace → team → org → custom, then by name.
+// views. Prefers `DisplayName` from the resolver (workspace.name from
+// the DB) when present; falls back to a UUID-prefix label.
+//
+// Issue #2988: pre-fix, every namespace used a shortID-truncated UUID
+// label. On a root workspace where workspace==team==org IDs collide
+// (resolver derive() degenerate case), all three labels rendered
+// identically. DisplayName disambiguates by surfacing real workspace
+// names — the canvas dropdown now reads "Workspace (mac laptop)" /
+// "Team (mac laptop)" / "Org (mac laptop)" for a root workspace
+// rather than three identical UUID prefixes. The `kind` prefix
+// "Workspace/Team/Org" still carries the semantic distinction.
 func namespacesToViews(in []namespace.Namespace) []NamespaceView {
 	views := make([]NamespaceView, 0, len(in))
 	for _, n := range in {
 		views = append(views, NamespaceView{
 			Name:  n.Name,
 			Kind:  n.Kind,
-			Label: namespaceLabel(n.Name, n.Kind),
+			Label: namespaceLabelWithName(n.Name, n.Kind, n.DisplayName),
 		})
 	}
 	return views
 }

-// namespaceLabel renders a human-friendly label for a namespace. The
-// canvas displays this directly; we keep the formatting server-side
-// so the shape stays consistent across UIs (canvas, future TUI, etc.).
+// namespaceLabel renders a human-friendly label for a namespace using
+// the UUID-prefix fallback only. Kept for back-compat with callers
+// that don't yet plumb a display name. New callers should use
+// namespaceLabelWithName which prefers the workspace's display name
+// when available.
 //
-// Format:
-//   workspace:abc-123 → "Workspace (abc-123)"   (UUID short-prefixed)
+// Format (UUID-prefix fallback):
+//   workspace:abc-123 → "Workspace (abc-123)"
 //   team:t-1          → "Team (t-1)"
 //   org:acme          → "Org (acme)"
-//   custom:foo        → "foo"                   (operator-defined; raw)
+//   custom:foo        → "foo"
 func namespaceLabel(name string, kind contract.NamespaceKind) string {
+	return namespaceLabelWithName(name, kind, "")
+}
+
+// namespaceLabelWithName renders the human-friendly label, preferring
+// `displayName` when non-empty.
+//
+// When displayName is set:
+//   Workspace, "mac laptop"    → "Workspace (mac laptop)"
+//   Team, "Engineering team"   → "Team (Engineering team)"
+//   Org, "Hongming's Org"      → "Org (Hongming's Org)"
+//
+// When displayName is empty (lookup miss, future-migration drop, etc.),
+// falls back to the UUID-prefix shape for back-compat.
+//
+// Custom namespaces ignore displayName because they're operator-defined
+// — the operator chose the raw suffix as the label, surfacing a
+// different "name" would be a UX surprise.
+func namespaceLabelWithName(name string, kind contract.NamespaceKind, displayName string) string {
 	suffix := ""
 	if i := indexOfColon(name); i >= 0 && i+1 < len(name) {
 		suffix = name[i+1:]
 	}
 	switch kind {
 	case contract.NamespaceKindWorkspace:
+		if displayName != "" {
+			return "Workspace (" + displayName + ")"
+		}
 		return "Workspace (" + shortID(suffix) + ")"
 	case contract.NamespaceKindTeam:
+		if displayName != "" {
+			return "Team (" + displayName + ")"
+		}
 		return "Team (" + shortID(suffix) + ")"
 	case contract.NamespaceKindOrg:
+		if displayName != "" {
+			return "Org (" + displayName + ")"
+		}
 		return "Org (" + suffix + ")"
 	case contract.NamespaceKindCustom:
-		// Custom namespaces are operator-defined; surface the raw
-		// suffix so they can label them however they want.
+		// Operator-defined; the suffix IS the label they chose.
+		// displayName is ignored — surfacing a different name would
+		// be a UX surprise for an operator who deliberately named
+		// the namespace.
 		if suffix == "" {
 			return name
 		}
@@ -507,6 +507,92 @@ func TestMemoriesV2_Forget_MissingMemoryID_400(t *testing.T) {
 // View-shaping unit tests — pin individual helpers
 // ─────────────────────────────────────────────────────────────────────────────

+// namespaceLabelWithName tests — the new code path that prefers
+// DisplayName over UUID-prefix fallback (issue #2988).
+func TestNamespaceLabelWithName_PrefersDisplayNameWhenSet(t *testing.T) {
+	cases := []struct {
+		name         string
+		raw          string
+		kind         contract.NamespaceKind
+		display      string
+		want         string
+	}{
+		{"workspace with name", "workspace:abc-1234", contract.NamespaceKindWorkspace, "mac laptop", "Workspace (mac laptop)"},
+		{"team with name", "team:abc-1234", contract.NamespaceKindTeam, "Engineering", "Team (Engineering)"},
+		{"org with name", "org:acme", contract.NamespaceKindOrg, "Hongming's Org", "Org (Hongming's Org)"},
+		// Custom ignores displayName by design — operator chose the suffix.
+		{"custom ignores displayName", "custom:ops-shared", contract.NamespaceKindCustom, "FancyName", "ops-shared"},
+		{"unknown kind falls through", "weird:x", contract.NamespaceKind("future"), "WhoCares", "weird:x"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := namespaceLabelWithName(tc.raw, tc.kind, tc.display)
+			if got != tc.want {
+				t.Errorf("namespaceLabelWithName(%q, %q, %q) = %q, want %q",
+					tc.raw, tc.kind, tc.display, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestNamespaceLabelWithName_FallsBackToUUIDPrefixWhenEmpty(t *testing.T) {
+	// When displayName is empty (NULL in DB, lookup miss, etc.), the
+	// label shape MUST match the legacy UUID-prefix shape exactly so
+	// existing canvas behaviour is unchanged for callers that don't
+	// plumb a name.
+	cases := []struct {
+		raw  string
+		kind contract.NamespaceKind
+		want string
+	}{
+		{"workspace:abcdefghij", contract.NamespaceKindWorkspace, "Workspace (abcdefgh)"},
+		{"team:t-99", contract.NamespaceKindTeam, "Team (t-99)"},
+		{"org:acme", contract.NamespaceKindOrg, "Org (acme)"},
+	}
+	for _, tc := range cases {
+		got := namespaceLabelWithName(tc.raw, tc.kind, "")
+		if got != tc.want {
+			t.Errorf("displayName=\"\" path: got %q, want %q", got, tc.want)
+		}
+	}
+}
+
+func TestNamespacesToViews_PassesDisplayNameThrough(t *testing.T) {
+	in := []namespace.Namespace{
+		{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, DisplayName: "mac laptop"},
+		{Name: "team:root-1", Kind: contract.NamespaceKindTeam, DisplayName: "mac laptop"}, // root → team aliases self
+		{Name: "org:root-1", Kind: contract.NamespaceKindOrg, DisplayName: "mac laptop"},
+	}
+	out := namespacesToViews(in)
+	if len(out) != 3 {
+		t.Fatalf("len = %d, want 3", len(out))
+	}
+	wantLabels := []string{
+		"Workspace (mac laptop)",
+		"Team (mac laptop)",
+		"Org (mac laptop)",
+	}
+	for i, v := range out {
+		if v.Label != wantLabels[i] {
+			t.Errorf("[%d] label = %q, want %q", i, v.Label, wantLabels[i])
+		}
+	}
+}
+
+func TestNamespacesToViews_FallsBackToUUIDLabelWhenDisplayNameEmpty(t *testing.T) {
+	// Exercises the back-compat path — DisplayName="" plumbs through
+	// to namespaceLabelWithName which returns the legacy UUID-prefix
+	// label. This is what callers see when the workspaces table
+	// has a NULL name (defensive — workspaces.name is NOT NULL today).
+	in := []namespace.Namespace{
+		{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace}, // no DisplayName
+	}
+	out := namespacesToViews(in)
+	if out[0].Label != "Workspace (root-1)" {
+		t.Errorf("fallback label = %q, want %q", out[0].Label, "Workspace (root-1)")
+	}
+}
+
 func TestNamespaceLabel_AllKinds(t *testing.T) {
 	cases := []struct {
 		name string
@@ -20,12 +20,14 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler"
 	"github.com/google/uuid"
 )
+
 // createWorkspaceTree recursively materialises an OrgWorkspace (and its
 // descendants) into the workspaces + canvas_layouts tables and kicks off
 // Docker provisioning. absX/absY are THIS workspace's absolute canvas
@@ -80,61 +82,6 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 		}
 	}

-	// 5s timeout bounds the lookup independently of any HTTP request
-	// context. createWorkspaceTree runs in goroutines spawned from the
-	// /org/import handler, so plumbing the request context here would
-	// cascade-cancel into provisionWorkspaceAuto and abort in-flight
-	// EC2 provisioning if the client disconnected mid-import — that's
-	// the wrong behaviour. A short bounded timeout protects the
-	// per-row SELECT against a wedged DB without taking the
-	// drop-everything-on-disconnect tradeoff.
-	ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancelLookup()
-	// Idempotency: if a workspace with the same (parent_id, name) already
-	// exists, skip the INSERT + canvas_layouts + broadcast + provisioning.
-	// This is what makes /org/import safe to call multiple times — the
-	// historical leak was every call recreating the entire tree (see
-	// tenant-hongming, 72 distinct child workspaces in 4 days, all from
-	// repeated org-template spawns of the same template).
-	//
-	// Recursion still runs on the existing id so partial-match templates
-	// (parent exists, some children missing) backfill the missing children
-	// instead of either no-op'ing the whole subtree or duplicating the
-	// existing children.
-	//
-	// /org/import is ADDITIVE-ONLY, never destructive. Children present
-	// in the existing tree but absent from the new template are
-	// preserved (no DELETE on diff). Skip-path also does NOT propagate
-	// updates to existing nodes — a re-import that adds an
-	// initial_memory or schedule to an existing workspace is silently
-	// dropped (the function bypasses seedInitialMemories, schedule SQL,
-	// channel config for skipped rows). To force-update an existing
-	// tree, delete and re-import or use a future /org/sync route.
-	existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
-	if lookupErr != nil {
-		return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr)
-	}
-	if existing {
-		log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID)
-		parentRef := ""
-		if parentID != nil {
-			parentRef = *parentID
-		}
-		provlog.Event("provision.skip_existing", map[string]any{
-			"name":        ws.Name,
-			"existing_id": existingID,
-			"parent_id":   parentRef,
-			"tier":        tier,
-		})
-		*results = append(*results, map[string]interface{}{
-			"id":      existingID,
-			"name":    ws.Name,
-			"tier":    tier,
-			"skipped": true,
-		})
-		return h.recurseChildrenForImport(ws, existingID, absX, absY, defaults, orgBaseDir, results, provisionSem)
-	}
-
 	id := uuid.New().String()
 	awarenessNS := workspaceAwarenessNamespace(id)

@@ -186,10 +133,67 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 	if maxConcurrent <= 0 {
 		maxConcurrent = models.DefaultMaxConcurrentTasks
 	}
-	_, err := db.DB.ExecContext(ctx, `
+	// TOCTOU-safe insert (#2872 Critical 1).
+	//
+	// `ON CONFLICT DO NOTHING` paired with the partial unique index
+	// from migration 20260506000000_workspaces_unique_parent_name.up.sql
+	// atomically resolves a race window that the prior
+	// lookup-then-insert had: two concurrent /org/import POSTs both
+	// saw "not found" in lookupExistingChild and both INSERT'd the
+	// same (parent_id, name). After this swap the SECOND INSERT
+	// silently no-ops, RETURNING returns 0 rows → sql.ErrNoRows, and
+	// the skip-path runs.
+	//
+	// ON CONFLICT target uses (COALESCE(parent_id,...), name) WHERE
+	// status != 'removed' — must match the partial-index predicate
+	// EXACTLY for Postgres to consider the index applicable.
+	var insertedID string
+	err := db.DB.QueryRowContext(ctx, `
 		INSERT INTO workspaces (id, name, role, tier, runtime, awareness_namespace, status, parent_id, workspace_dir, workspace_access, max_concurrent_tasks)
 		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
-	`, id, ws.Name, role, tier, runtime, awarenessNS, "provisioning", parentID, workspaceDir, workspaceAccess, maxConcurrent)
+		ON CONFLICT (COALESCE(parent_id, '00000000-0000-0000-0000-000000000000'::uuid), name)
+		WHERE status != 'removed'
+		DO NOTHING
+		RETURNING id
+	`, id, ws.Name, role, tier, runtime, awarenessNS, "provisioning", parentID, workspaceDir, workspaceAccess, maxConcurrent).Scan(&insertedID)
+	if errors.Is(err, sql.ErrNoRows) {
+		// Skip path — a non-removed row already exists for
+		// (parent_id, name). Re-select its id; idempotency-friendly
+		// semantics match the original lookupExistingChild path
+		// (parent_id IS NOT DISTINCT FROM matches NULL too,
+		// status='removed' rows are ignored).
+		ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancelLookup()
+		existingID, found, selErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
+		if selErr != nil {
+			return fmt.Errorf("post-conflict re-select for %s: %w", ws.Name, selErr)
+		}
+		if !found {
+			// Index conflicted but row vanished between INSERT and
+			// re-SELECT (status flipped to 'removed' concurrently).
+			// Surface as an error rather than silently retrying —
+			// the user can re-trigger /org/import safely.
+			return fmt.Errorf("workspace %q conflicted on insert but not visible on re-select (concurrent status flip?)", ws.Name)
+		}
+		log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID)
+		parentRef := ""
+		if parentID != nil {
+			parentRef = *parentID
+		}
+		provlog.Event("provision.skip_existing", map[string]any{
+			"name":        ws.Name,
+			"existing_id": existingID,
+			"parent_id":   parentRef,
+			"tier":        tier,
+		})
+		*results = append(*results, map[string]interface{}{
+			"id":      existingID,
+			"name":    ws.Name,
+			"tier":    tier,
+			"skipped": true,
+		})
+		return h.recurseChildrenForImport(ws, existingID, absX, absY, defaults, orgBaseDir, results, provisionSem)
+	}
 	if err != nil {
 		log.Printf("Org import: failed to create %s: %v", ws.Name, err)
 		return fmt.Errorf("failed to create %s: %w", ws.Name, err)
@@ -227,7 +231,7 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 	if parentID != nil {
 		payload["parent_id"] = *parentID
 	}
-	h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISIONING", id, payload)
+	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisioning), id, payload)

 	// Seed initial memories from workspace config or defaults (issue #1050).
 	// Per-workspace initial_memories override defaults; if workspace has none,
@@ -243,7 +247,7 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, url = $2 WHERE id = $3`, models.StatusOnline, ws.URL, id); err != nil {
 			log.Printf("Org import: external workspace status update failed for %s: %v", ws.Name, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", id, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), id, map[string]interface{}{
 			"name": ws.Name, "external": true,
 		})
 	} else if h.workspace.HasProvisioner() {
@@ -31,11 +31,25 @@ import (
 // tests pin the helper's three observable behaviors plus an AST gate
 // that catches future re-introductions of the un-checked INSERT.

+// lookupChildSQLRE anchors the sqlmock ExpectQuery on every load-bearing
+// token of lookupExistingChild's SELECT (org_import.go:639-645). A loose
+// substring match (the prior shape, just `SELECT id FROM workspaces`)
+// would silent-pass a regression that drops `IS NOT DISTINCT FROM`
+// (breaks NULL-parent matching), drops `parent_id` entirely (hijacks
+// siblings of the same name across different parents), or drops the
+// `status != 'removed'` filter (blocks re-import after Collapse).
+// RFC #2872 Important-2.
+//
+// The four anchored tokens are exactly the predicates the bug shapes
+// would tamper with. Whitespace is `\s+` so a future formatter pass
+// doesn't churn this string.
+const lookupChildSQLRE = `(?s)SELECT id FROM workspaces\s+WHERE name = \$1\s+AND parent_id IS NOT DISTINCT FROM \$2\s+AND status != 'removed'`
+
 func TestLookupExistingChild_NotFound_ReturnsFalseNoError(t *testing.T) {
 	mock := setupTestDB(t)
 	// 0-row result → driver returns sql.ErrNoRows on Scan.
 	parent := "parent-1"
-	mock.ExpectQuery(`SELECT id FROM workspaces`).
+	mock.ExpectQuery(lookupChildSQLRE).
 		WithArgs("Alpha", &parent).
 		WillReturnRows(sqlmock.NewRows([]string{"id"}))

@@ -56,7 +70,7 @@ func TestLookupExistingChild_NotFound_ReturnsFalseNoError(t *testing.T) {
 func TestLookupExistingChild_Found_ReturnsIDAndTrue(t *testing.T) {
 	mock := setupTestDB(t)
 	parent := "parent-1"
-	mock.ExpectQuery(`SELECT id FROM workspaces`).
+	mock.ExpectQuery(lookupChildSQLRE).
 		WithArgs("Alpha", &parent).
 		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-existing-uuid"))

@@ -79,7 +93,7 @@ func TestLookupExistingChild_NilParent_MatchesRoot(t *testing.T) {
 	// a plain `=` would never match a NULL row. Pin that roots
 	// (parent_id=NULL) are still found by the lookup.
 	mock := setupTestDB(t)
-	mock.ExpectQuery(`SELECT id FROM workspaces`).
+	mock.ExpectQuery(lookupChildSQLRE).
 		WithArgs("RootAgent", (*string)(nil)).
 		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-root-uuid"))

@@ -102,7 +116,7 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
 	mock := setupTestDB(t)
 	parent := "parent-1"
 	connFail := errors.New("simulated postgres unavailable")
-	mock.ExpectQuery(`SELECT id FROM workspaces`).
+	mock.ExpectQuery(lookupChildSQLRE).
 		WithArgs("Alpha", &parent).
 		WillReturnError(connFail)

@@ -137,7 +151,7 @@ func TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound(t *testing.T) {
 	mock := setupTestDB(t)
 	parent := "parent-1"
 	wrapped := fmt.Errorf("driver-wrapped: %w", sql.ErrNoRows)
-	mock.ExpectQuery(`SELECT id FROM workspaces`).
+	mock.ExpectQuery(lookupChildSQLRE).
 		WithArgs("Alpha", &parent).
 		WillReturnError(wrapped)

@@ -209,19 +223,42 @@ func findLookupAndWorkspacesInsertPos(t *testing.T, fname string, src []byte) (l
 	return
 }

-// Source-level guard — pins that org_import.go calls
-// h.lookupExistingChild BEFORE its INSERT INTO workspaces.
+// onConflictDoNothingRE pins the TOCTOU-safe shape introduced by
+// migration 20260506000000_workspaces_unique_parent_name.up.sql +
+// the org_import.go INSERT swap (#2872 Critical 1). The workspaces
+// INSERT MUST funnel concurrent collisions through the partial unique
+// index — `ON CONFLICT (...) WHERE status != 'removed' DO NOTHING`
+// is the literal pg statement form that achieves it.
+//
+// The pattern intentionally requires both the COALESCE expression
+// (so root-workspace NULL parents collide) AND the partial-index WHERE
+// clause (so 'removed' rows don't block re-imports). A regression that
+// drops either piece would make the index target a different shape
+// than the migration created, and Postgres would emit
+// "no unique or exclusion constraint matching the ON CONFLICT
+// specification" at runtime — but only on the FIRST collision attempt
+// in production, not in CI without a live race. This regex catches
+// the shape in source so the bug never ships.
+var onConflictDoNothingRE = regexp.MustCompile(
+	`(?s)ON\s+CONFLICT\s*\(\s*COALESCE\s*\(\s*parent_id\s*,\s*'00000000-0000-0000-0000-000000000000'::uuid\s*\)\s*,\s*name\s*\).*?WHERE\s+status\s*!=\s*'removed'.*?DO\s+NOTHING`,
+)
+
+// Source-level guard — pins that org_import.go's INSERT INTO workspaces
+// uses the TOCTOU-safe ON CONFLICT DO NOTHING pattern.
 //
 // Per memory feedback_behavior_based_ast_gates.md: pin the behavior
-// (idempotency check before INSERT), not just function names. If a
-// future refactor reintroduces the un-checked INSERT (the original
-// bug shape that leaked 72 workspaces in 4 days), this test fails.
+// (atomic conflict resolution at the DB), not just function names.
+// If a future refactor reintroduces the un-checked INSERT (the original
+// bug shape that leaked 72 workspaces in 4 days at tenant-hongming),
+// this test fails BEFORE the broken code reaches production where the
+// race window opens.
 //
-// AST-walk implementation closes the silent-false-pass mode that the
-// previous bytes.Index gate had — see workspacesInsertRE comment for
-// the failure mode (workspaces_audit / workspace_secrets / etc.
-// shadowing the real target via prefix match).
-func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
+// Replaces an earlier "lookup-before-insert" gate that became obsolete
+// when this swap moved idempotency into the database. The earlier
+// gate would silent-false-pass against ON CONFLICT — even though that
+// shape is correct — because lookupExistingChild now runs AFTER the
+// INSERT (only on the skip path, to retrieve the existing id).
+func TestCreateWorkspaceTree_InsertUsesOnConflictDoNothing(t *testing.T) {
 	wd, err := os.Getwd()
 	if err != nil {
 		t.Fatalf("getwd: %v", err)
@@ -230,30 +267,24 @@ func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
 	if err != nil {
 		t.Fatalf("read org_import.go: %v", err)
 	}
-	lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "org_import.go", src)
-
-	if lookupPos == token.NoPos {
-		t.Fatalf("AST: no call to lookupExistingChild in org_import.go — idempotency check removed?")
-	}
-	if insertPos == token.NoPos {
+	insertSQL := findWorkspacesInsertSQL(t, "org_import.go", src)
+	if insertSQL == "" {
 		t.Fatalf("AST: no SQL literal matching `^\\s*INSERT INTO workspaces\\s*\\(` in any CallExpr in org_import.go — schema change or rename?")
 	}
-	if lookupPos > insertPos {
-		t.Errorf("lookupExistingChild call at %s must come BEFORE INSERT INTO workspaces at %s — non-idempotent ordering would re-leak under repeat /org/import calls",
-			fset.Position(lookupPos), fset.Position(insertPos))
+	if !onConflictDoNothingRE.MatchString(insertSQL) {
+		t.Errorf("workspaces INSERT SQL does NOT use the TOCTOU-safe ON CONFLICT shape — concurrent /org/import POSTs will silently double-insert. Required pattern:\n  ON CONFLICT (COALESCE(parent_id, '00000000-...'::uuid), name) WHERE status != 'removed' DO NOTHING\n\nActual SQL:\n%s", insertSQL)
 	}
 }

-// TestGate_FailsWhenLookupAfterInsert proves the gate actually catches
-// the bug it's named after — running it against synthetic Go source
-// where the lookup call is positioned AFTER the workspaces INSERT must
-// produce lookupPos > insertPos, which the production gate flags as
-// an ERROR. Without this test the gate could regress to "always pass"
-// and we wouldn't notice until the bug shipped again.
+// TestGate_FailsWhenInsertOmitsOnConflict proves the gate actually
+// catches the bug it's named after — running it against synthetic Go
+// source where the workspaces INSERT lacks the ON CONFLICT clause must
+// fail the regex match. Without this test the gate could regress to
+// "always pass" and the TOCTOU window would silently reopen.
 //
-// Per memory feedback_assert_exact_not_substring.md: verify a
-// tightened test FAILS on old code before merging.
-func TestGate_FailsWhenLookupAfterInsert(t *testing.T) {
+// Per memory feedback_assert_exact_not_substring.md: verify the
+// tightened test FAILS on the bug shape before merging.
+func TestGate_FailsWhenInsertOmitsOnConflict(t *testing.T) {
 	const buggySrc = `package handlers

 import "context"
@@ -264,26 +295,57 @@ func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{})

 type fakeOrgHandler struct{}

-func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
-	return "", false, nil
-}
-
 func buggyCreate(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
-	// Bug shape: INSERT runs FIRST, lookup runs AFTER. This is the
-	// non-idempotent ordering the gate exists to forbid.
+	// Bug shape: bare INSERT, no ON CONFLICT. Two concurrent calls
+	// race past the unique-index check before either completes the
+	// transaction; constraint failure surfaces as a 500 to the
+	// caller (not graceful skip). Pre-#2872 this would silently
+	// duplicate-insert.
 	db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
-	h.lookupExistingChild(ctx, name, parentID)
 }
 `
-	lookupPos, insertPos, _ := findLookupAndWorkspacesInsertPos(t, "buggy.go", []byte(buggySrc))
-	if lookupPos == token.NoPos || insertPos == token.NoPos {
-		t.Fatalf("synthetic buggy source missing expected nodes (lookupPos=%v insertPos=%v) — helper logic regression", lookupPos, insertPos)
+	insertSQL := findWorkspacesInsertSQL(t, "buggy.go", []byte(buggySrc))
+	if insertSQL == "" {
+		t.Fatalf("synthetic buggy source missing workspaces INSERT — helper logic regression")
 	}
-	if lookupPos < insertPos {
-		t.Fatalf("synthetic bug shape (lookup AFTER insert) returned lookupPos=%d < insertPos=%d — gate would NOT fire on actual bug, regression!", lookupPos, insertPos)
+	if onConflictDoNothingRE.MatchString(insertSQL) {
+		t.Fatalf("synthetic bug shape (bare INSERT, no ON CONFLICT) was MATCHED by the gate — regression: gate would not flag the actual bug. SQL:\n%s", insertSQL)
 	}
-	// Implicit: lookupPos > insertPos here, which the production gate
-	// flags via t.Errorf. This proves the gate is live, not vestigial.
+}
+
+// findWorkspacesInsertSQL walks `src` and returns the unquoted SQL of
+// the first string literal matching workspacesInsertRE inside any
+// CallExpr's argument list. Returns "" if none found. Helper for the
+// ON CONFLICT gate above.
+func findWorkspacesInsertSQL(t *testing.T, fname string, src []byte) string {
+	t.Helper()
+	fset := token.NewFileSet()
+	file, err := parser.ParseFile(fset, fname, src, parser.ParseComments)
+	if err != nil {
+		t.Fatalf("parse %s: %v", fname, err)
+	}
+	var sql string
+	ast.Inspect(file, func(n ast.Node) bool {
+		call, ok := n.(*ast.CallExpr)
+		if !ok {
+			return true
+		}
+		for _, arg := range call.Args {
+			lit, ok := arg.(*ast.BasicLit)
+			if !ok || lit.Kind != token.STRING {
+				continue
+			}
+			raw := lit.Value
+			if unq, err := strconv.Unquote(raw); err == nil {
+				raw = unq
+			}
+			if workspacesInsertRE.MatchString(raw) && sql == "" {
+				sql = raw
+			}
+		}
+		return true
+	})
+	return sql
 }

 // TestGate_IgnoresAuditTableShadow proves the regex tightening
@@ -451,6 +451,201 @@ func TestIntegration_PendingUploads_AckedIndexExists(t *testing.T) {
 	}
 }

+// TestIntegration_PollUpload_AtomicRollback_AcrossBothTables proves the
+// #149 cross-table contract at the database layer: when PutBatchTx and
+// LogActivityTx run in the same caller-owned Tx and an activity INSERT
+// fails after some rows have already been INSERTed, Rollback unwinds
+// BOTH tables, leaving zero rows.
+//
+// Coverage map (#149):
+//   - chat_files_poll_test.go's TestPollUpload_AtomicRollbackOnActivityInsertFailure
+//     uses sqlmock to prove the Go handler issues Begin / N inserts /
+//     Rollback in the right order (no Commit on failure path).
+//   - This integration test proves the helpers + real Postgres compose
+//     correctly: rollback after a mid-Tx activity insert failure
+//     actually reverts BOTH the prior activity row AND the
+//     pending_uploads rows from PutBatchTx.
+//   - The pre-existing TestIntegration_PendingUploads_PutBatch_AtomicRollback
+//     covers the pending_uploads-only case.
+//
+// Failure injection: a NUL byte in `summary` (TEXT column) — lib/pq
+// rejects it at the protocol layer. Same trick the existing PutBatch
+// AtomicRollback test uses for the pending_uploads INSERT.
+func TestIntegration_PollUpload_AtomicRollback_AcrossBothTables(t *testing.T) {
+	conn := integrationDB_PendingUploads(t)
+	ctx := context.Background()
+
+	// activity_logs has a FK to workspaces(id) — seed a real row so
+	// non-failing inserts succeed. Wipe activity_logs + this workspaces
+	// row at end so the next test sees a clean slate (the integrationDB
+	// helper only wipes pending_uploads).
+	wsID := uuid.New()
+	if _, err := conn.ExecContext(ctx,
+		`INSERT INTO workspaces (id, name) VALUES ($1, 'test-149-rollback')`, wsID,
+	); err != nil {
+		t.Fatalf("seed workspace: %v", err)
+	}
+	t.Cleanup(func() {
+		// CASCADE on workspaces FK deletes the activity_logs rows; explicit
+		// DELETE on activity_logs catches any rows that somehow leaked.
+		_, _ = conn.ExecContext(context.Background(), `DELETE FROM activity_logs WHERE workspace_id = $1`, wsID)
+		_, _ = conn.ExecContext(context.Background(), `DELETE FROM workspaces WHERE id = $1`, wsID)
+	})
+
+	store := pendinguploads.NewPostgres(conn)
+
+	// Mirror uploadPollMode's Tx shape: BeginTx → PutBatchTx → N ×
+	// LogActivityTx → Commit (or Rollback on failure).
+	tx, err := conn.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+
+	items := []pendinguploads.PutItem{
+		{Content: []byte("first"), Filename: "a.txt", Mimetype: "text/plain"},
+		{Content: []byte("second"), Filename: "b.txt", Mimetype: "text/plain"},
+	}
+	fileIDs, err := store.PutBatchTx(ctx, tx, wsID, items)
+	if err != nil {
+		t.Fatalf("PutBatchTx: %v", err)
+	}
+	if len(fileIDs) != 2 {
+		t.Fatalf("len(fileIDs) = %d, want 2", len(fileIDs))
+	}
+
+	// First activity insert succeeds — would commit if not for the
+	// rollback that the second insert's failure forces.
+	wsIDStr := wsID.String()
+	method := "chat_upload_receive"
+	okSummary := "chat_upload_receive: a.txt"
+	if _, err := LogActivityTx(ctx, tx, nil, ActivityParams{
+		WorkspaceID:  wsIDStr,
+		ActivityType: "a2a_receive",
+		TargetID:     &wsIDStr,
+		Method:       &method,
+		Summary:      &okSummary,
+		Status:       "ok",
+	}); err != nil {
+		t.Fatalf("first LogActivityTx (should succeed): %v", err)
+	}
+
+	// Second activity insert: NUL byte in summary triggers lib/pq
+	// "invalid byte sequence for encoding UTF8: 0x00" — the canonical
+	// "DB-side error after some Tx work has already happened" we want.
+	badSummary := "chat_upload_receive: b\x00.txt"
+	_, err = LogActivityTx(ctx, tx, nil, ActivityParams{
+		WorkspaceID:  wsIDStr,
+		ActivityType: "a2a_receive",
+		TargetID:     &wsIDStr,
+		Method:       &method,
+		Summary:      &badSummary,
+		Status:       "ok",
+	})
+	if err == nil {
+		t.Fatal("expected error from NUL-byte summary, got nil")
+	}
+
+	// Caller (uploadPollMode in production) rolls back on the error.
+	if rbErr := tx.Rollback(); rbErr != nil {
+		t.Fatalf("Rollback: %v", rbErr)
+	}
+
+	// THE assertion this test exists for: BOTH tables must have zero
+	// rows for this workspace. Pre-#149 the activity_logs row from the
+	// first insert would persist (separate fire-and-forget INSERT) and
+	// pending_uploads would also persist (committed by PutBatch's own
+	// Tx). Post-#149 the shared Tx + Rollback unwinds both.
+	var puCount, alCount int
+	if err := conn.QueryRowContext(ctx,
+		`SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID,
+	).Scan(&puCount); err != nil {
+		t.Fatalf("count pending_uploads: %v", err)
+	}
+	if err := conn.QueryRowContext(ctx,
+		`SELECT COUNT(*) FROM activity_logs WHERE workspace_id = $1`, wsID,
+	).Scan(&alCount); err != nil {
+		t.Fatalf("count activity_logs: %v", err)
+	}
+	if puCount != 0 {
+		t.Errorf("pending_uploads leaked %d row(s) after Rollback — #149 regression", puCount)
+	}
+	if alCount != 0 {
+		t.Errorf("activity_logs leaked %d row(s) after Rollback — #149 regression "+
+			"(THIS is the scenario the ticket called out: pre-fix, the first activity row "+
+			"committed in its own implicit Tx, leaving an orphan)", alCount)
+	}
+}
+
+// TestIntegration_PollUpload_HappyPath_AcrossBothTables is the positive
+// counterpart to the rollback test: when nothing fails, both tables
+// commit together and the row counts match.
+func TestIntegration_PollUpload_HappyPath_AcrossBothTables(t *testing.T) {
+	conn := integrationDB_PendingUploads(t)
+	ctx := context.Background()
+
+	wsID := uuid.New()
+	if _, err := conn.ExecContext(ctx,
+		`INSERT INTO workspaces (id, name) VALUES ($1, 'test-149-happy')`, wsID,
+	); err != nil {
+		t.Fatalf("seed workspace: %v", err)
+	}
+	t.Cleanup(func() {
+		_, _ = conn.ExecContext(context.Background(), `DELETE FROM activity_logs WHERE workspace_id = $1`, wsID)
+		_, _ = conn.ExecContext(context.Background(), `DELETE FROM workspaces WHERE id = $1`, wsID)
+	})
+
+	store := pendinguploads.NewPostgres(conn)
+	tx, err := conn.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatalf("BeginTx: %v", err)
+	}
+
+	items := []pendinguploads.PutItem{
+		{Content: []byte("a"), Filename: "a.txt", Mimetype: "text/plain"},
+		{Content: []byte("b"), Filename: "b.txt", Mimetype: "text/plain"},
+		{Content: []byte("c"), Filename: "c.txt", Mimetype: "text/plain"},
+	}
+	if _, err := store.PutBatchTx(ctx, tx, wsID, items); err != nil {
+		t.Fatalf("PutBatchTx: %v", err)
+	}
+	wsIDStr := wsID.String()
+	method := "chat_upload_receive"
+	for _, it := range items {
+		summary := "chat_upload_receive: " + it.Filename
+		if _, err := LogActivityTx(ctx, tx, nil, ActivityParams{
+			WorkspaceID:  wsIDStr,
+			ActivityType: "a2a_receive",
+			TargetID:     &wsIDStr,
+			Method:       &method,
+			Summary:      &summary,
+			Status:       "ok",
+		}); err != nil {
+			t.Fatalf("LogActivityTx %q: %v", it.Filename, err)
+		}
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("Commit: %v", err)
+	}
+
+	var puCount, alCount int
+	if err := conn.QueryRowContext(ctx,
+		`SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID,
+	).Scan(&puCount); err != nil {
+		t.Fatalf("count pending_uploads: %v", err)
+	}
+	if err := conn.QueryRowContext(ctx,
+		`SELECT COUNT(*) FROM activity_logs WHERE workspace_id = $1`, wsID,
+	).Scan(&alCount); err != nil {
+		t.Fatalf("count activity_logs: %v", err)
+	}
+	if puCount != 3 {
+		t.Errorf("pending_uploads count = %d, want 3", puCount)
+	}
+	if alCount != 3 {
+		t.Errorf("activity_logs count = %d, want 3", alCount)
+	}
+}
+
 func TestIntegration_PendingUploads_GetIgnoresExpiredAndAcked(t *testing.T) {
 	conn := integrationDB_PendingUploads(t)
 	store := pendinguploads.NewPostgres(conn)
@@ -2,6 +2,7 @@ package handlers_test

 import (
 	"context"
+	"database/sql"
 	"encoding/json"
 	"errors"
 	"net/http"
@@ -84,6 +85,9 @@ func (f *fakeStorage) Sweep(_ context.Context, _ time.Duration) (pendinguploads.
 func (f *fakeStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
 	return nil, nil
 }
+func (f *fakeStorage) PutBatchTx(_ context.Context, _ *sql.Tx, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
+	return nil, nil
+}

 func newRouter(handler *handlers.PendingUploadsHandler) *gin.Engine {
 	gin.SetMode(gin.TestMode)
@@ -414,7 +414,7 @@ func (h *RegistryHandler) Register(c *gin.Context) {
 	}

 	// Broadcast WORKSPACE_ONLINE
-	if err := h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", payload.ID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.ID, map[string]interface{}{
 		"url":           cachedURL,
 		"agent_card":    payload.AgentCard,
 		"delivery_mode": effectiveMode,
@@ -572,7 +572,7 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {

 	// Broadcast current task update only when it changed (avoid spamming on every heartbeat)
 	if payload.CurrentTask != prevTask {
-		h.broadcaster.BroadcastOnly(payload.WorkspaceID, "TASK_UPDATED", map[string]interface{}{
+		h.broadcaster.BroadcastOnly(payload.WorkspaceID, string(events.EventTaskUpdated), map[string]interface{}{
 			"current_task": payload.CurrentTask,
 			"active_tasks": payload.ActiveTasks,
 		})
@@ -593,7 +593,7 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
 	// so per-heartbeat cost is one in-memory channel send per active
 	// SSE subscriber and one WS hub fan-out. At 30s heartbeat cadence
 	// this is far below any noise floor on either path.
-	h.broadcaster.BroadcastOnly(payload.WorkspaceID, "WORKSPACE_HEARTBEAT", map[string]interface{}{
+	h.broadcaster.BroadcastOnly(payload.WorkspaceID, string(events.EventWorkspaceHeartbeat), map[string]interface{}{
 		"active_tasks":   payload.ActiveTasks,
 		"uptime_seconds": payload.UptimeSeconds,
 	})
@@ -678,7 +678,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		if err != nil {
 			log.Printf("Heartbeat: failed to mark %s degraded (wedged): %v", payload.WorkspaceID, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_DEGRADED", payload.WorkspaceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{
 			"runtime_state": "wedged",
 			"sample_error":  payload.SampleError,
 		})
@@ -699,7 +699,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusDegraded, payload.WorkspaceID); err != nil {
 			log.Printf("Heartbeat: failed to mark %s degraded: %v", payload.WorkspaceID, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_DEGRADED", payload.WorkspaceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{
 			"error_rate":   payload.ErrorRate,
 			"sample_error": payload.SampleError,
 		})
@@ -718,7 +718,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusOnline, payload.WorkspaceID); err != nil {
 			log.Printf("Heartbeat: failed to recover %s to online: %v", payload.WorkspaceID, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", payload.WorkspaceID, map[string]interface{}{})
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{})
 	}

 	// Recovery: if workspace was offline but is now sending heartbeats, bring it back online.
@@ -728,7 +728,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'offline'`, models.StatusOnline, payload.WorkspaceID); err != nil {
 			log.Printf("Heartbeat: failed to recover %s from offline: %v", payload.WorkspaceID, err)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", payload.WorkspaceID, map[string]interface{}{})
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{})
 	}

 	// Auto-recovery: if a workspace is marked "provisioning" but is actively sending
@@ -743,7 +743,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		} else {
 			log.Printf("Heartbeat: transitioned %s from provisioning to online (heartbeat received)", payload.WorkspaceID)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", payload.WorkspaceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{
 			"recovered_from": currentStatus,
 		})
 	}
@@ -771,7 +771,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		} else {
 			log.Printf("Heartbeat: transitioned %s from awaiting_agent to online (heartbeat received)", payload.WorkspaceID)
 		}
-		h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", payload.WorkspaceID, map[string]interface{}{
+		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{
 			"recovered_from": currentStatus,
 		})
 	}
@@ -820,7 +820,7 @@ func (h *RegistryHandler) UpdateCard(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(c.Request.Context(), "AGENT_CARD_UPDATED", payload.WorkspaceID, map[string]interface{}{
+	h.broadcaster.RecordAndBroadcast(c.Request.Context(), string(events.EventAgentCardUpdated), payload.WorkspaceID, map[string]interface{}{
 		"agent_card": payload.AgentCard,
 	})

@@ -1,23 +1,20 @@
 package handlers

-// template_files_eic.go — SSH-backed file write for SaaS workspaces
-// (EC2-per-workspace). Pairs with the existing Docker-path in templates.go
-// (WriteFile) and template_import.go (ReplaceFiles).
+// template_files_eic.go — SSH-backed file operations for SaaS workspaces
+// (EC2-per-workspace). Pairs with the local-Docker path in templates.go
+// (List/Read/Write/Delete) and template_import.go (ReplaceFiles).
 //
-// Flow for a single file write:
-//  1. Generate ephemeral ed25519 keypair (on-disk for ≤ write duration).
-//  2. Push the public key via `aws ec2-instance-connect send-ssh-public-key`
-//     so the target sshd accepts it for the next 60s.
-//  3. Open a TLS-tunnelled TCP port via `aws ec2-instance-connect open-tunnel`
-//     from a local free port → workspace's sshd on 22.
-//  4. Pipe content to `ssh ... "install -D -m 0644 /dev/stdin <abs path>"`.
-//     `install -D` creates any missing parent dirs atomically. File is owned
-//     by whichever $OSUser we authenticated as (ubuntu by default).
-//  5. Close tunnel + wipe keydir.
+// Architecture note: every operation goes through `withEICTunnel`, which
+// owns the ephemeral-keypair → key-push → tunnel → port-wait dance. Per-
+// op helpers (list/read/write/delete) only carry the remote command +
+// stdin/stdout shape. This keeps the EIC connection logic in one place
+// so a fix to the dance — e.g. PR #2822's `LogLevel=ERROR` shim — only
+// touches one helper.
 //
-// All the AWS calls + ssh tunnel exec go through the same package-level
-// func vars defined in terminal.go (openTunnelCmd, sendSSHPublicKey) so
-// tests can stub them the same way the terminal tests do.
+// Path translation rules: see resolveWorkspaceFilePath. `/configs`
+// is the per-runtime managed-config indirection (claude-code → /configs,
+// hermes → /home/ubuntu/.hermes); other allow-listed roots (`/home`,
+// `/workspace`, `/plugins`) pass through literally.

 import (
 	"bytes"
@@ -32,8 +29,7 @@ import (
 )

 // workspaceFilePathPrefix maps a runtime name to the absolute base path on
-// the workspace EC2 where the Files API's relative paths land. New runtimes
-// can be added here without touching handler code.
+// the workspace EC2 where the runtime's managed-config dir lives.
 //
 // Keep these stable — changing the base path for an existing runtime
 // without a migration shim will make previously-saved files disappear from
@@ -60,41 +56,104 @@ var workspaceFilePathPrefix = map[string]string{
 	// those runtimes actually have on disk.
 }

-func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
+// resolveWorkspaceFilePath translates (runtime, root, relPath) into an
+// absolute path on the workspace EC2.
+//
+// `root="/configs"` (or empty / unrecognized) is treated as the
+// runtime's MANAGED-config dir via workspaceFilePathPrefix —
+// /home/ubuntu/.hermes for hermes, /configs for claude-code, etc.
+// This preserves the v1 ReadFile/WriteFile behavior where the canvas's
+// Config tab GETs/PUTs "config.yaml" without specifying a root and
+// lands in the runtime's own config dir, even though that dir's
+// absolute path differs per runtime.
+//
+// Any other allow-listed root (`/home`, `/workspace`, `/plugins`) is
+// treated as a LITERAL absolute path on the EC2 host. Those roots are
+// universal Linux paths that don't need per-runtime indirection.
+//
+// Restricting the literal pass-through to allowedRoots is the
+// security boundary — the handler also gates this same set, so the
+// resolver is defence-in-depth: even if a future caller forgets the
+// handler-side check, the resolver won't translate `?root=/etc` into
+// a real absolute path.
+//
+// relPath is sanitised by validateRelPath (no absolute, no `..`).
+func resolveWorkspaceFilePath(runtime, root, relPath string) (string, error) {
 	if err := validateRelPath(relPath); err != nil {
 		return "", err
 	}
-	base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
-	if !ok {
-		base = "/configs"
-	}
+	base := resolveWorkspaceRootPath(runtime, root)
 	return filepath.Join(base, filepath.Clean(relPath)), nil
 }

-// eicFileWriteTimeout bounds the whole dance. Key push is <500ms, tunnel
-// is 1-2s, ssh + write is <2s. 30s gives headroom for slow pulls without
-// hanging the Files API forever under EIC misconfiguration.
-const eicFileWriteTimeout = 30 * time.Second
-
-// writeFileViaEIC writes a single file to the workspace EC2 at the
-// absolute path that resolveWorkspaceFilePath computed. On success,
-// optionally invokes the runtime's reload hook (not implemented yet —
-// tracked as follow-up; for today the canvas issues a separate Restart
-// after Save).
+// resolveWorkspaceRootPath returns the absolute base directory on the
+// workspace EC2 for a given (runtime, root) pair, without touching a
+// relative file path. Used by listFilesViaEIC to compute the directory
+// to walk; resolveWorkspaceFilePath joins this with relPath.
 //
-// instanceID: AWS EC2 instance id from workspaces.instance_id.
-// runtime: used only for path-prefix resolution.
-// relPath: the relative path the caller validated (no /, no ..).
-// content: file body bytes.
-func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, content []byte) error {
+// Centralising the runtime-vs-literal indirection here means
+// list/read/write/delete agree on what `?root=/configs` means for
+// hermes vs claude-code vs an unknown runtime — otherwise list could
+// show one directory while read/write target another.
+func resolveWorkspaceRootPath(runtime, root string) string {
+	root = strings.TrimSpace(root)
+	// "/configs" + empty + unrecognized → runtime's managed-config dir.
+	// The runtime prefix map is the SSOT for that translation.
+	if root == "" || root == "/configs" || !allowedRoots[root] {
+		base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
+		if !ok {
+			base = "/configs"
+		}
+		return base
+	}
+	// Literal universal path (`/home`, `/workspace`, `/plugins`).
+	return root
+}
+
+// eicFileOpTimeout bounds the whole tunnel + ssh dance. Key push is
+// <500ms, tunnel is 1-2s, ssh + remote command is <2s for read/write.
+// 30s gives headroom for slow EIC pulls + the larger `find` walk that
+// listFilesViaEIC issues, without hanging the Files API forever under
+// EIC misconfiguration.
+const eicFileOpTimeout = 30 * time.Second
+
+// eicFileOpTimeout was historically named eicFileWriteTimeout when the
+// only EIC op was writeFile. Keep an alias so any external test that
+// pinned the old name still compiles; rename can land as a follow-up
+// once we've gone a release without the alias being touched.
+//
+//nolint:revive // intentional alias for back-compat with prior tests.
+const eicFileWriteTimeout = eicFileOpTimeout
+
+// eicSSHSession describes an open EIC tunnel ready for an ssh subprocess.
+// Only valid inside the closure passed to withEICTunnel — the underlying
+// keypair + tunnel are torn down when the closure returns.
+type eicSSHSession struct {
+	keyPath    string
+	localPort  int
+	osUser     string
+	instanceID string
+}
+
+// withEICTunnel sets up an EIC SSH session (ephemeral keypair → push
+// → AWS open-tunnel → wait-for-port), invokes fn with a session handle,
+// and tears everything down on return. The caller is responsible for
+// applying the per-op context.WithTimeout before calling — this helper
+// only owns the EIC dance, not the operation budget, so a caller that
+// needs a different timeout (e.g. a large bulk import) doesn't have to
+// fight a hard-coded one.
+//
+// All AWS calls go through the package-level func vars in terminal.go
+// (sendSSHPublicKey, openTunnelCmd) so tests can stub them the same way
+// terminal_test.go does. The whole helper is also assigned to a
+// `var` (`withEICTunnel`) so handler-dispatch tests can stub the entire
+// dance instead of having to wire up a fake tunnel + fake ssh server.
+var withEICTunnel = realWithEICTunnel
+
+func realWithEICTunnel(ctx context.Context, instanceID string, fn func(s eicSSHSession) error) error {
 	if instanceID == "" {
 		return fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
 	}
-	absPath, err := resolveWorkspaceFilePath(runtime, relPath)
-	if err != nil {
-		return fmt.Errorf("invalid path: %w", err)
-	}
-
 	osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
 	if osUser == "" {
 		osUser = "ubuntu"
@@ -104,11 +163,7 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
 		region = "us-east-2"
 	}

-	ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout)
-	defer cancel()
-
-	// Ephemeral keypair.
-	keyDir, err := os.MkdirTemp("", "molecule-filewrite-*")
+	keyDir, err := os.MkdirTemp("", "molecule-eic-*")
 	if err != nil {
 		return fmt.Errorf("keydir mkdir: %w", err)
 	}
@@ -116,7 +171,7 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
 	keyPath := keyDir + "/id"
 	if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
 		"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
-		"-C", "molecule-filewrite",
+		"-C", "molecule-eic",
 	).CombinedOutput(); kerr != nil {
 		return fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
 	}
@@ -125,24 +180,21 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
 		return fmt.Errorf("read pubkey: %w", err)
 	}

-	// 1. Push key.
 	if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
 		return fmt.Errorf("send-ssh-public-key: %w", err)
 	}

-	// 2. Open tunnel on an OS-picked free port.
 	localPort, err := pickFreePort()
 	if err != nil {
 		return fmt.Errorf("pick free port: %w", err)
 	}
-	opts := eicSSHOptions{
+	tunnel := openTunnelCmd(eicSSHOptions{
 		InstanceID:     instanceID,
 		OSUser:         osUser,
 		Region:         region,
 		LocalPort:      localPort,
 		PrivateKeyPath: keyPath,
-	}
-	tunnel := openTunnelCmd(opts)
+	})
 	tunnel.Env = os.Environ()
 	if err := tunnel.Start(); err != nil {
 		return fmt.Errorf("open-tunnel start: %w", err)
@@ -157,183 +209,330 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
 		return fmt.Errorf("tunnel never listened: %w", err)
 	}

-	// 3. SSH + install -D. `install` creates any missing parent dirs and
-	// writes the file atomically via temp-file-rename. Permissions 0644
-	// match the existing tar-unpack defaults on the Docker path.
-	//
-	// `sudo -n` (non-interactive) prefix: the canonical containerized
-	// workspace layout puts /configs at the root, owned by root because
-	// cloud-init runs as root (see
-	// molecule-controlplane/internal/provisioner/userdata_containerized.go).
-	// SSH-as-ubuntu can't write into /configs without escalation.
-	// Ubuntu has passwordless sudo on EC2 by default; sudo -n fails fast
-	// (no prompt) if that ever changes, surfacing a clean error instead
-	// of a hang. The hermes path /home/ubuntu/.hermes is ubuntu-owned
-	// and doesn't strictly need sudo, but using it uniformly avoids
-	// per-runtime branching here.
-	//
-	// The remote command is fully deterministic — no user-controlled
-	// input reaches a shell eval (absPath is built from a map + Clean()).
-	sshArgs := []string{
-		"-i", keyPath,
+	return fn(eicSSHSession{
+		keyPath:    keyPath,
+		localPort:  localPort,
+		osUser:     osUser,
+		instanceID: instanceID,
+	})
+}
+
+// sshArgs returns the standard ssh CLI args for an EIC session pointed
+// at the local tunnel port + a single remote command string.
+//
+// `LogLevel=ERROR` silences the benign "Warning: Permanently added
+// '[127.0.0.1]:NNNNN' to known hosts" notice that ssh emits on every
+// fresh tunnel connection. Without this, the notice lands on stderr
+// and fools the read/list "empty stdout + empty stderr → not found"
+// classifiers into thinking the warning is a real ssh-layer error → 500
+// instead of 404 (Hermes config.yaml load, hongming tenant, 2026-05-05
+// 02:38; PR #2822). Real auth/tunnel errors stay visible because they're
+// emitted at ERROR level.
+//
+// Originally each helper assembled its own ssh args inline, so PR #2822's
+// LogLevel=ERROR fix had to be applied to every copy. Centralising here
+// means future ssh-option tweaks only land in one place.
+func (s eicSSHSession) sshArgs(remoteCommand string) []string {
+	return []string{
+		"-i", s.keyPath,
 		"-o", "StrictHostKeyChecking=no",
 		"-o", "UserKnownHostsFile=/dev/null",
-		// LogLevel=ERROR silences the benign "Warning: Permanently
-		// added '[127.0.0.1]:NNNNN' to known hosts" notice that ssh
-		// emits on every fresh tunnel connection. Without this, the
-		// notice lands on stderr and fools readFileViaEIC's "empty
-		// stdout + empty stderr → file not found" classifier into
-		// thinking the warning is a real ssh-layer error → 500
-		// instead of 404 (Hermes config.yaml load, hongming tenant,
-		// 2026-05-05 02:38). Real auth/tunnel errors stay visible
-		// because they're emitted at ERROR level.
 		"-o", "LogLevel=ERROR",
 		"-o", "ServerAliveInterval=15",
-		"-p", fmt.Sprintf("%d", localPort),
-		fmt.Sprintf("%s@127.0.0.1", osUser),
-		fmt.Sprintf("sudo -n install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
+		"-p", fmt.Sprintf("%d", s.localPort),
+		fmt.Sprintf("%s@127.0.0.1", s.osUser),
+		remoteCommand,
 	}
-	sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
-	sshCmd.Env = os.Environ()
-	sshCmd.Stdin = bytes.NewReader(content)
-	var stderr bytes.Buffer
-	sshCmd.Stderr = &stderr
-	if err := sshCmd.Run(); err != nil {
-		return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String()))
+}
+
+// buildInstallShell returns the remote command for atomically writing
+// `/dev/stdin` to absPath with mode 0644 via `sudo -n install -D`.
+// `install -D` creates any missing parent dirs and writes via
+// temp-file-rename (atomic). Pure function for direct testability —
+// the only variable input (absPath) is shellQuote-wrapped to defeat
+// any shell metachar in a future caller's path.
+func buildInstallShell(absPath string) string {
+	return fmt.Sprintf("sudo -n install -D -m 0644 /dev/stdin %s", shellQuote(absPath))
+}
+
+// buildCatShell returns the remote command for reading absPath and
+// swallowing missing-file stderr (so the empty-stdout + non-zero-exit
+// case is unambiguous → os.ErrNotExist at the caller).
+func buildCatShell(absPath string) string {
+	return fmt.Sprintf("sudo -n cat %s 2>/dev/null", shellQuote(absPath))
+}
+
+// buildRmShell returns the remote command for `sudo -n rm -f` against
+// absPath. `-f` (not `-rf`) is intentional — directory removal needs
+// its own explicit endpoint if/when the canvas grows that affordance,
+// and `-rf` would let a misclassified directory entry trigger a
+// recursive delete.
+func buildRmShell(absPath string) string {
+	return fmt.Sprintf("sudo -n rm -f %s", shellQuote(absPath))
+}
+
+// buildFindShell returns the remote command for enumerating files
+// under listPath up to maxDepth, emitting `TYPE|SIZE|REL_PATH` lines
+// (matches the local-Docker container path's parser exactly).
+//
+// `2>/dev/null` swallows find's "No such file" error so a missing
+// listing root surfaces as empty stdout (handler returns []) rather
+// than 500.
+//
+// `stat -c %s` is GNU coreutils; `stat -f %z` is BSD. Try GNU first,
+// fall back to BSD, then 0 — same shape the local-Docker `sh -c`
+// version uses so a future cross-runtime fleet (Alpine vs Ubuntu)
+// doesn't regress.
+//
+// Hidden / cache dir pruning matches the container path: .git,
+// __pycache__, node_modules, .DS_Store. Without these the tree drowns
+// in transient artefacts on a /workspace listing.
+func buildFindShell(listPath string, maxDepth int) string {
+	return fmt.Sprintf(
+		`sudo -n find %s -maxdepth %d -not -path '*/.git/*' -not -path '*/__pycache__/*' -not -path '*/node_modules/*' -not -name .DS_Store 2>/dev/null | while IFS= read -r f; do `+
+			`rel="${f#%s/}"; [ "$rel" = %s ] && continue; [ -z "$rel" ] && continue; `+
+			`if [ -d "$f" ]; then echo "d|0|$rel"; else `+
+			`s=$(stat -c %%s "$f" 2>/dev/null || stat -f %%z "$f" 2>/dev/null || echo 0); echo "f|$s|$rel"; `+
+			`fi; done`,
+		shellQuote(listPath), maxDepth, shellQuote(listPath), shellQuote(listPath),
+	)
+}
+
+// parseFindOutput parses TYPE|SIZE|REL_PATH lines emitted by
+// buildFindShell into eicFileEntry rows. Whitespace-only lines and
+// malformed rows are silently skipped — the same behaviour as the
+// local-Docker container parser for symmetric output.
+func parseFindOutput(raw []byte) []eicFileEntry {
+	files := make([]eicFileEntry, 0)
+	for _, line := range strings.Split(string(raw), "\n") {
+		parts := strings.SplitN(line, "|", 3)
+		if len(parts) != 3 || parts[2] == "" {
+			continue
+		}
+		var size int64
+		fmt.Sscanf(parts[1], "%d", &size)
+		files = append(files, eicFileEntry{
+			Path: parts[2],
+			Size: size,
+			Dir:  parts[0] == "d",
+		})
 	}
-	log.Printf("writeFileViaEIC: ws instance=%s runtime=%s wrote %d bytes → %s",
-		instanceID, runtime, len(content), absPath)
-	return nil
+	return files
 }

 // shellQuote wraps a value in single quotes + escapes embedded single
-// quotes for POSIX sh. Used for the sole piece of variable data in the
-// remote ssh command. (absPath is already built from a map + Clean() so
-// traversal is blocked regardless; this is defence-in-depth against
-// future refactor that might accept user paths here.)
+// quotes for POSIX sh. Used for the variable parts of remote ssh
+// commands (absolute paths). The paths are already built from a
+// validated allowlist + Clean(), so traversal is blocked regardless;
+// this is defence-in-depth against a future refactor that might accept
+// user paths directly here.
 func shellQuote(s string) string {
 	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
 }

+// writeFileViaEIC writes a single file to the workspace EC2 at the
+// absolute path that resolveWorkspaceFilePath computed. On success,
+// optionally invokes the runtime's reload hook (not implemented yet —
+// tracked as follow-up; for today the canvas issues a separate Restart
+// after Save).
+//
+// `install -D` creates any missing parent dirs and writes atomically
+// via temp-file-rename. Permissions 0644 match the existing tar-unpack
+// defaults on the Docker path.
+//
+// `sudo -n` (non-interactive) prefix: the canonical containerized
+// workspace layout puts /configs at the root, owned by root because
+// cloud-init runs as root (see
+// molecule-controlplane/internal/provisioner/userdata_containerized.go).
+// SSH-as-ubuntu can't write into /configs without escalation. Ubuntu
+// has passwordless sudo on EC2 by default; sudo -n fails fast (no
+// prompt) if that ever changes, surfacing a clean error instead of a
+// hang. The hermes path /home/ubuntu/.hermes is ubuntu-owned and
+// doesn't strictly need sudo, but using it uniformly avoids per-runtime
+// branching here.
+func writeFileViaEIC(ctx context.Context, instanceID, runtime, root, relPath string, content []byte) error {
+	absPath, err := resolveWorkspaceFilePath(runtime, root, relPath)
+	if err != nil {
+		return fmt.Errorf("invalid path: %w", err)
+	}
+	ctx, cancel := context.WithTimeout(ctx, eicFileOpTimeout)
+	defer cancel()
+
+	return withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(buildInstallShell(absPath))...)
+		sshCmd.Env = os.Environ()
+		sshCmd.Stdin = bytes.NewReader(content)
+		var stderr bytes.Buffer
+		sshCmd.Stderr = &stderr
+		if err := sshCmd.Run(); err != nil {
+			return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String()))
+		}
+		log.Printf("writeFileViaEIC: ws instance=%s runtime=%s root=%s wrote %d bytes → %s",
+			instanceID, runtime, root, len(content), absPath)
+		return nil
+	})
+}
+
 // readFileViaEIC reads a single file from the workspace EC2 at the
 // absolute path that resolveWorkspaceFilePath computes. Mirrors
-// writeFileViaEIC end-to-end (ephemeral keypair, EIC tunnel, ssh) so
-// canvas's Config tab can GET back what it just PUT. Pre-fix the GET
-// path (templates.go ReadFile) only handled local Docker containers
-// + a host-side template fallback; SaaS workspaces (EC2-per-workspace)
-// always 404'd because neither handles their on-EC2 layout.
+// writeFileViaEIC (ephemeral keypair, EIC tunnel, ssh) so the canvas's
+// Config tab can GET back what it just PUT.
 //
 // Returns ("", os.ErrNotExist) when the remote path doesn't exist so
 // the handler can map it to HTTP 404 cleanly. Other errors propagate.
-func readFileViaEIC(ctx context.Context, instanceID, runtime, relPath string) ([]byte, error) {
-	if instanceID == "" {
-		return nil, fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
-	}
-	absPath, err := resolveWorkspaceFilePath(runtime, relPath)
+//
+// `sudo -n cat`: /configs is root-owned (same reason writeFileViaEIC
+// needs sudo). The path is built from a validated map + Clean(), so no
+// user-controlled string reaches the shell here. `2>/dev/null` swallows
+// `cat: ...: No such file` so the missing-file case returns empty
+// stdout + non-zero exit, which we translate to os.ErrNotExist.
+func readFileViaEIC(ctx context.Context, instanceID, runtime, root, relPath string) ([]byte, error) {
+	absPath, err := resolveWorkspaceFilePath(runtime, root, relPath)
 	if err != nil {
 		return nil, fmt.Errorf("invalid path: %w", err)
 	}
-
-	osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
-	if osUser == "" {
-		osUser = "ubuntu"
-	}
-	region := os.Getenv("AWS_REGION")
-	if region == "" {
-		region = "us-east-2"
-	}
-
-	ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout)
+	ctx, cancel := context.WithTimeout(ctx, eicFileOpTimeout)
 	defer cancel()

-	keyDir, err := os.MkdirTemp("", "molecule-fileread-*")
-	if err != nil {
-		return nil, fmt.Errorf("keydir mkdir: %w", err)
-	}
-	defer func() { _ = os.RemoveAll(keyDir) }()
-	keyPath := keyDir + "/id"
-	if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
-		"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
-		"-C", "molecule-fileread",
-	).CombinedOutput(); kerr != nil {
-		return nil, fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
-	}
-	pubKey, err := os.ReadFile(keyPath + ".pub")
-	if err != nil {
-		return nil, fmt.Errorf("read pubkey: %w", err)
-	}
-
-	if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
-		return nil, fmt.Errorf("send-ssh-public-key: %w", err)
-	}
-
-	localPort, err := pickFreePort()
-	if err != nil {
-		return nil, fmt.Errorf("pick free port: %w", err)
-	}
-	tunnel := openTunnelCmd(eicSSHOptions{
-		InstanceID:     instanceID,
-		OSUser:         osUser,
-		Region:         region,
-		LocalPort:      localPort,
-		PrivateKeyPath: keyPath,
+	var out []byte
+	runErr := withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(buildCatShell(absPath))...)
+		sshCmd.Env = os.Environ()
+		var stdout, stderr bytes.Buffer
+		sshCmd.Stdout = &stdout
+		sshCmd.Stderr = &stderr
+		err := sshCmd.Run()
+		out = stdout.Bytes()
+		if err != nil {
+			// `cat` returns 1 on missing file; with 2>/dev/null we have no
+			// stderr distinguisher. Treat empty-stdout + empty-stderr +
+			// non-zero exit as not-found rather than a tunnel/auth error
+			// (those usually produce stderr from ssh itself, not from the
+			// remote command).
+			if len(out) == 0 && stderr.Len() == 0 {
+				return os.ErrNotExist
+			}
+			return fmt.Errorf("ssh cat: %w (%s)", err, strings.TrimSpace(stderr.String()))
+		}
+		log.Printf("readFileViaEIC: ws instance=%s runtime=%s root=%s read %d bytes ← %s",
+			instanceID, runtime, root, len(out), absPath)
+		return nil
 	})
-	tunnel.Env = os.Environ()
-	if err := tunnel.Start(); err != nil {
-		return nil, fmt.Errorf("open-tunnel start: %w", err)
-	}
-	defer func() {
-		if tunnel.Process != nil {
-			_ = tunnel.Process.Kill()
-		}
-		_ = tunnel.Wait()
-	}()
-	if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil {
-		return nil, fmt.Errorf("tunnel never listened: %w", err)
-	}
-
-	// `sudo -n cat`: /configs is root-owned by cloud-init (same reason
-	// writeFileViaEIC needs sudo to install). The path is built from a
-	// validated map + Clean(), so no user-controlled string reaches the
-	// shell here. `2>/dev/null` swallows `cat: ...: No such file` so
-	// the missing-file case returns empty stdout + non-zero exit, which
-	// we translate to os.ErrNotExist below.
-	sshCmd := exec.CommandContext(ctx, "ssh",
-		"-i", keyPath,
-		"-o", "StrictHostKeyChecking=no",
-		"-o", "UserKnownHostsFile=/dev/null",
-		// LogLevel=ERROR silences the benign "Warning: Permanently
-		// added '[127.0.0.1]:NNNNN' to known hosts" notice that ssh
-		// emits on every fresh tunnel connection. Without this, the
-		// notice lands on stderr and fools readFileViaEIC's "empty
-		// stdout + empty stderr → file not found" classifier into
-		// thinking the warning is a real ssh-layer error → 500
-		// instead of 404 (Hermes config.yaml load, hongming tenant,
-		// 2026-05-05 02:38). Real auth/tunnel errors stay visible
-		// because they're emitted at ERROR level.
-		"-o", "LogLevel=ERROR",
-		"-o", "ServerAliveInterval=15",
-		"-p", fmt.Sprintf("%d", localPort),
-		fmt.Sprintf("%s@127.0.0.1", osUser),
-		fmt.Sprintf("sudo -n cat %s 2>/dev/null", shellQuote(absPath)),
-	)
-	sshCmd.Env = os.Environ()
-	var stdout, stderr bytes.Buffer
-	sshCmd.Stdout = &stdout
-	sshCmd.Stderr = &stderr
-	runErr := sshCmd.Run()
-	out := stdout.Bytes()
 	if runErr != nil {
-		// `cat` returns 1 on missing file; with 2>/dev/null we have no
-		// stderr distinguisher. Treat empty-stdout + non-zero exit as
-		// not-found rather than a tunnel/auth error (those usually
-		// produce stderr from ssh itself, not from the remote command).
-		if len(out) == 0 && stderr.Len() == 0 {
-			return nil, os.ErrNotExist
-		}
-		return nil, fmt.Errorf("ssh cat: %w (%s)", runErr, strings.TrimSpace(stderr.String()))
+		return nil, runErr
 	}
-	log.Printf("readFileViaEIC: ws instance=%s runtime=%s read %d bytes ← %s",
-		instanceID, runtime, len(out), absPath)
 	return out, nil
 }
+
+// eicFileEntry is the wire shape returned by listFilesViaEIC. It
+// matches the inline `fileEntry` in templates.go::ListFiles so the
+// handler can emit either path's output without a translation layer.
+type eicFileEntry struct {
+	Path string `json:"path"`
+	Size int64  `json:"size"`
+	Dir  bool   `json:"dir"`
+}
+
+// listFilesViaEIC enumerates files under <root>/<sub> on the workspace
+// EC2 host, up to the given depth, returning entries with paths
+// relative to the listing root (matching the local-Docker path's
+// output). Closes the symmetry gap that left ListFiles silently
+// returning [] for SaaS workspaces — see issue #2999.
+//
+// Output line format: TYPE|SIZE|REL_PATH (matches the container's find
+// shell so the parser is identical). `find -maxdepth N` traverses up
+// to N levels; the canvas requests depth=1 by default and re-fetches
+// when the user expands a directory.
+//
+// Pruning: same hidden / cache dirs as the container path (.git,
+// __pycache__, node_modules, .DS_Store) so the canvas's tree doesn't
+// drown in transient artefacts.
+//
+// `sudo -n` matches the read/write paths — even though the universal
+// roots (/home, /workspace, /plugins) are typically ubuntu-owned and
+// don't need it, /configs and runtime-prefix dirs do (root-owned by
+// cloud-init), and using sudo uniformly avoids per-root branching.
+func listFilesViaEIC(ctx context.Context, instanceID, runtime, root, sub string, depth int) ([]eicFileEntry, error) {
+	if sub != "" {
+		if err := validateRelPath(sub); err != nil {
+			return nil, fmt.Errorf("invalid sub: %w", err)
+		}
+	}
+	if depth < 1 {
+		depth = 1
+	}
+	if depth > 5 {
+		depth = 5
+	}
+	listPath := resolveWorkspaceRootPath(runtime, root)
+	if sub != "" {
+		listPath = filepath.Join(listPath, filepath.Clean(sub))
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, eicFileOpTimeout)
+	defer cancel()
+
+	var rawOutput []byte
+	runErr := withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(buildFindShell(listPath, depth))...)
+		sshCmd.Env = os.Environ()
+		var stdout, stderr bytes.Buffer
+		sshCmd.Stdout = &stdout
+		sshCmd.Stderr = &stderr
+		if err := sshCmd.Run(); err != nil {
+			// Empty stdout + empty stderr after we swallowed find's
+			// own error stream means the listing root genuinely
+			// doesn't exist on this workspace — return an empty
+			// slice rather than a 500. Real ssh/tunnel errors emit
+			// to stderr at LogLevel=ERROR.
+			if stdout.Len() == 0 && stderr.Len() == 0 {
+				rawOutput = nil
+				return nil
+			}
+			return fmt.Errorf("ssh find: %w (%s)", err, strings.TrimSpace(stderr.String()))
+		}
+		rawOutput = stdout.Bytes()
+		return nil
+	})
+	if runErr != nil {
+		return nil, runErr
+	}
+
+	files := parseFindOutput(rawOutput)
+	log.Printf("listFilesViaEIC: ws instance=%s runtime=%s root=%s sub=%s depth=%d → %d entries from %s",
+		instanceID, runtime, root, sub, depth, len(files), listPath)
+	return files, nil
+}
+
+// deleteFileViaEIC removes a single file from the workspace EC2.
+// Returns nil for both "deleted" and "didn't exist" — `rm -f` doesn't
+// distinguish, and the canvas's delete-then-refresh flow doesn't need
+// it to.
+//
+// Symmetry note: pre-fix DeleteFile (templates.go:514) had no EIC
+// branch, so right-click delete on a SaaS workspace would fall through
+// to the local-Docker path, find no container (dockerCli is nil on
+// SaaS), and try the ephemeral-volume path which itself only handles
+// local Docker volumes. Net effect: silent no-op. Closing this gap is
+// part of issue #2999.
+func deleteFileViaEIC(ctx context.Context, instanceID, runtime, root, relPath string) error {
+	absPath, err := resolveWorkspaceFilePath(runtime, root, relPath)
+	if err != nil {
+		return fmt.Errorf("invalid path: %w", err)
+	}
+	ctx, cancel := context.WithTimeout(ctx, eicFileOpTimeout)
+	defer cancel()
+
+	return withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
+		sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(buildRmShell(absPath))...)
+		sshCmd.Env = os.Environ()
+		var stderr bytes.Buffer
+		sshCmd.Stderr = &stderr
+		if err := sshCmd.Run(); err != nil {
+			return fmt.Errorf("ssh rm: %w (%s)", err, strings.TrimSpace(stderr.String()))
+		}
+		log.Printf("deleteFileViaEIC: ws instance=%s runtime=%s root=%s removed %s",
+			instanceID, runtime, root, absPath)
+		return nil
+	})
+}
@@ -0,0 +1,303 @@
+package handlers
+
+// template_files_eic_dispatch_test.go — handler-level tests for the
+// EIC dispatch added in PR-A of issue #2999. Pre-PR-A, ListFiles and
+// DeleteFile silently fell through to the local-Docker path on SaaS
+// workspaces (where dockerCli is nil) and returned [] / silent no-op.
+// These tests pin the new behavior:
+//
+//   1. instance_id != "" → handler invokes the EIC helper
+//   2. EIC success → 200 with the helper's payload
+//   3. EIC error → 500 (does NOT fall through to local-Docker /
+//      template-dir, which would mask the real failure)
+//   4. instance_id == "" → existing local-Docker / template-dir
+//      fallback (back-compat with self-hosted operators)
+//
+// Stubs `withEICTunnel` so the entire EIC dance (keypair, AWS calls,
+// tunnel, ssh) is replaced with a fake closure that yields a captured
+// session — lets the test capture what the inner closure would have
+// done without spinning up a real sshd. The test for the actual
+// remote shell shapes lives in template_files_eic_shells_test.go
+// (pure-function tests on buildFindShell / buildInstallShell etc).
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// stubWithEICTunnel replaces the package-level withEICTunnel with a
+// closure that records its inputs and runs fn against a fake session,
+// returning fnErr from the inner fn if non-nil. Restores the original
+// on test cleanup.
+func stubWithEICTunnel(t *testing.T, fnErr error) (calls *[]string) {
+	t.Helper()
+	captured := []string{}
+	calls = &captured
+	prev := withEICTunnel
+	withEICTunnel = func(ctx context.Context, instanceID string, fn func(s eicSSHSession) error) error {
+		captured = append(captured, instanceID)
+		// Hand the closure a sentinel session so any code that pulls
+		// session fields gets deterministic non-empty values. The
+		// closure's exec.Command call will fail at runtime because no
+		// real ssh exists for instanceID="i-test"; but most
+		// dispatch-tests inject fnErr directly to skip that.
+		return fnErr
+	}
+	t.Cleanup(func() { withEICTunnel = prev })
+	return calls
+}
+
+// stubWithEICTunnelReturning is like stubWithEICTunnel but lets the
+// test substitute the inner fn entirely so it can populate `out` /
+// return shaped errors without invoking the real ssh closure.
+func stubWithEICTunnelReturning(t *testing.T, replacement func(s eicSSHSession) error) (calls *[]string) {
+	t.Helper()
+	captured := []string{}
+	calls = &captured
+	prev := withEICTunnel
+	withEICTunnel = func(ctx context.Context, instanceID string, _ func(s eicSSHSession) error) error {
+		captured = append(captured, instanceID)
+		return replacement(eicSSHSession{instanceID: instanceID, osUser: "ubuntu", localPort: 12345, keyPath: "/tmp/k"})
+	}
+	t.Cleanup(func() { withEICTunnel = prev })
+	return calls
+}
+
+// ---- ListFiles EIC dispatch ----
+
+// TestListFiles_EICDispatch_Success: a workspace with instance_id set
+// must route to listFilesViaEIC, NOT to local-Docker / template-dir.
+// Verifies the handler hands the EIC helper's output back as JSON.
+//
+// Until PR-A this test would fail no matter what mocks were in place —
+// the dispatch branch did not exist.
+func TestListFiles_EICDispatch_Success(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
+		WithArgs("ws-eic").
+		WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
+			AddRow("My Agent", "i-test", "claude-code"))
+
+	// The package-level withEICTunnel stub doesn't get to set the
+	// listFilesViaEIC outparam, so we have to override the helper at
+	// a higher level. Instead, we stub withEICTunnel to *return* the
+	// inner closure's err — but we can't reach the byte-output path.
+	// Use the dedicated stubWithEICTunnelReturning + intercept ssh:
+	// since the tunnel stub doesn't run the closure's ssh exec at all
+	// when we replace the inner fn, the helper's `rawOutput` stays
+	// nil and parseFindOutput returns []. Sufficient for "200 + empty"
+	// dispatch verification.
+	stubWithEICTunnelReturning(t, func(s eicSSHSession) error {
+		return nil // skip the real ssh; outer rawOutput stays nil → []
+	})
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-eic"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-eic/files?root=/configs", nil)
+
+	(&TemplatesHandler{}).ListFiles(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var got []map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &got); err != nil {
+		t.Fatalf("response not JSON array: %v (body=%s)", err, w.Body.String())
+	}
+	// EIC stub returned no output → empty list. The point of this
+	// assertion is "200 with [] from EIC", not "fell through to host
+	// template fallback which would 200 with []" — to discriminate,
+	// we ALSO assert mock expectations were met (proving the new SQL
+	// shape was queried) AND the local-Docker fallback path can't
+	// have run (handler.docker is nil here, so findContainer returns
+	// "" and the only paths that reach 200 are EIC or template-dir;
+	// template-dir requires a non-empty configsDir which we left at
+	// "" via the zero-value handler).
+	if got == nil {
+		t.Errorf("expected JSON array (even if empty); got null")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestListFiles_EICDispatch_Error: a real EIC failure (network blip,
+// AWS API throttle, sshd down) must surface as 500, NOT silently fall
+// through to the local-Docker path which would mask the failure as
+// "0 files" — which is the exact UX symptom the PR-A bug report cites.
+func TestListFiles_EICDispatch_Error(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
+		WithArgs("ws-eic-err").
+		WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
+			AddRow("My Agent", "i-test", "claude-code"))
+
+	stubWithEICTunnel(t, errors.New("eic open-tunnel: timeout"))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-eic-err"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-eic-err/files?root=/home", nil)
+
+	(&TemplatesHandler{}).ListFiles(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
+	}
+	if !strings.Contains(w.Body.String(), "failed to list files") {
+		t.Errorf("error body should describe ListFiles failure; got %s", w.Body.String())
+	}
+}
+
+// TestListFiles_EICBranch_NotTakenForSelfHosted: workspaces with no
+// instance_id (self-hosted, local-Docker path) MUST NOT enter the EIC
+// branch. Stubs withEICTunnel to fail loudly if it's called — the
+// stub being invoked is itself the assertion failure.
+func TestListFiles_EICBranch_NotTakenForSelfHosted(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
+		WithArgs("ws-local").
+		WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
+			AddRow("Local Agent", "", ""))
+
+	prev := withEICTunnel
+	withEICTunnel = func(ctx context.Context, instanceID string, fn func(s eicSSHSession) error) error {
+		t.Errorf("withEICTunnel called for self-hosted workspace (instance_id=''); EIC branch must be gated on non-empty instance_id")
+		return errors.New("should not be called")
+	}
+	defer func() { withEICTunnel = prev }()
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-local"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-local/files", nil)
+
+	(&TemplatesHandler{configsDir: t.TempDir()}).ListFiles(c)
+
+	// Don't pin the response code here — the local path's behavior is
+	// covered by TestListFiles_FallbackToHost_NoTemplate. Just confirm
+	// EIC wasn't called.
+}
+
+// ---- DeleteFile EIC dispatch ----
+
+// TestDeleteFile_EICDispatch_Success: same shape as ListFiles —
+// instance_id != "" routes to deleteFileViaEIC and returns 200 on
+// success. Pre-PR-A right-click delete on a SaaS workspace silently
+// no-op'd because findContainer returned "" and the ephemeral-volume
+// fallback only handles local Docker volumes.
+func TestDeleteFile_EICDispatch_Success(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
+		WithArgs("ws-eic-del").
+		WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
+			AddRow("My Agent", "i-test", "claude-code"))
+
+	stubWithEICTunnel(t, nil)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{
+		{Key: "id", Value: "ws-eic-del"},
+		{Key: "path", Value: "old.txt"},
+	}
+	c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-eic-del/files/old.txt", nil)
+
+	(&TemplatesHandler{}).DeleteFile(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	if !strings.Contains(w.Body.String(), `"deleted"`) {
+		t.Errorf("expected status:deleted; got %s", w.Body.String())
+	}
+}
+
+func TestDeleteFile_EICDispatch_Error(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
+		WithArgs("ws-eic-del-err").
+		WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
+			AddRow("My Agent", "i-test", "hermes"))
+
+	stubWithEICTunnel(t, errors.New("ssh rm: connection refused"))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{
+		{Key: "id", Value: "ws-eic-del-err"},
+		{Key: "path", Value: "old.txt"},
+	}
+	c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-eic-del-err/files/old.txt", nil)
+
+	(&TemplatesHandler{}).DeleteFile(c)
+
+	if w.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// TestListFiles_RootValidation: the handler must reject roots outside
+// the allowlist BEFORE any DB query (otherwise a bad root would burn
+// a tunnel + EIC call to discover what a 400 already knows). Critical
+// security guard — without it `?root=/etc` would translate via the
+// resolver's literal-pass-through. Let me prove the gate exists by
+// driving an out-of-allowlist root and asserting 400 + no DB query.
+func TestListFiles_RootValidation(t *testing.T) {
+	setupTestDB(t)
+	setupTestRedis(t)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-x"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-x/files?root=/etc", nil)
+
+	(&TemplatesHandler{}).ListFiles(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400 for /etc root, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
+// TestDeleteFile_RootValidation mirrors the ListFiles guard. PR-A
+// added ?root= handling to DeleteFile so the canvas's right-click
+// delete works on any root (not just /configs) — that means the
+// allowlist guard has to be present here too, otherwise an unsafe
+// root flows straight into the resolver.
+func TestDeleteFile_RootValidation(t *testing.T) {
+	setupTestDB(t)
+	setupTestRedis(t)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{
+		{Key: "id", Value: "ws-x"},
+		{Key: "path", Value: "f.txt"},
+	}
+	c.Request = httptest.NewRequest("DELETE", "/workspaces/ws-x/files/f.txt?root=/etc", nil)
+
+	(&TemplatesHandler{}).DeleteFile(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400 for /etc root, got %d: %s", w.Code, w.Body.String())
+	}
+}
@@ -0,0 +1,200 @@
+package handlers
+
+// template_files_eic_shells_test.go — pure-function tests for the
+// remote shell builders + parser. Factored out of the EIC helpers so
+// the wire shape can be pinned without standing up a real EIC tunnel
+// or sshd. If a future edit changes the find/install/cat/rm shell in
+// a way that drifts from the local-Docker container path, these tests
+// catch it before staging.
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestBuildInstallShell pins the write-side remote command. `install`
+// (not `cp`/`tee`) is load-bearing — it creates parent dirs (-D) and
+// writes atomically via temp-file-rename. Permissions 0644 match the
+// local-Docker tar-unpack defaults so a save → restart → save → restart
+// cycle doesn't flip-flop file modes per backend.
+func TestBuildInstallShell(t *testing.T) {
+	got := buildInstallShell("/configs/config.yaml")
+	wants := []string{
+		"sudo -n",                                   // privilege escalation for root-owned /configs
+		"install -D",                                // creates parent dirs
+		"-m 0644",                                   // permission contract
+		"/dev/stdin",                                // pipe-from-ssh source
+		"'/configs/config.yaml'",                    // shell-quoted destination
+	}
+	for _, w := range wants {
+		if !strings.Contains(got, w) {
+			t.Errorf("buildInstallShell missing %q in: %s", w, got)
+		}
+	}
+}
+
+// TestBuildCatShell pins the read-side remote command. `2>/dev/null`
+// is load-bearing: without it the missing-file case emits "cat: ...:
+// No such file" to stderr, and the helper's "empty stdout + empty
+// stderr → os.ErrNotExist" classifier fires the wrong branch (500
+// instead of 404). The tunnel-warning silencer (LogLevel=ERROR in
+// sshArgs) handles the ssh side; this one handles the remote-cmd side.
+func TestBuildCatShell(t *testing.T) {
+	got := buildCatShell("/home/ubuntu/.hermes/config.yaml")
+	wants := []string{
+		"sudo -n",
+		"cat",
+		"'/home/ubuntu/.hermes/config.yaml'",
+		"2>/dev/null", // missing-file → empty stdout + non-zero exit
+	}
+	for _, w := range wants {
+		if !strings.Contains(got, w) {
+			t.Errorf("buildCatShell missing %q in: %s", w, got)
+		}
+	}
+}
+
+// TestBuildRmShell pins `rm -f`, NOT `rm -rf`. A misclassified
+// directory entry passing through the validator must NOT trigger a
+// recursive delete. Directory removal needs its own explicit endpoint
+// when/if the canvas grows that affordance.
+func TestBuildRmShell(t *testing.T) {
+	got := buildRmShell("/configs/dead.yaml")
+	wants := []string{"sudo -n", "rm -f", "'/configs/dead.yaml'"}
+	for _, w := range wants {
+		if !strings.Contains(got, w) {
+			t.Errorf("buildRmShell missing %q in: %s", w, got)
+		}
+	}
+	// Negative assertion: NEVER emit -rf.
+	if strings.Contains(got, "rm -rf") {
+		t.Errorf("buildRmShell uses -rf, must use -f only: %s", got)
+	}
+}
+
+// TestBuildFindShell pins the listing-side remote command — it must
+// match the local-Docker path's parser shape (TYPE|SIZE|REL_PATH per
+// line) AND prune the same hidden / cache directories. If either
+// side drifts, a /workspace listing on EC2 either drowns in node_modules
+// noise (pruning regression) or drops files entirely (parser shape
+// regression).
+func TestBuildFindShell(t *testing.T) {
+	got := buildFindShell("/workspace", 2)
+	wants := []string{
+		"sudo -n find",
+		"'/workspace'",
+		"-maxdepth 2",
+		// Matches local-Docker container path; without these the EC2
+		// listing fills with VCS/build artefacts.
+		"-not -path '*/.git/*'",
+		"-not -path '*/__pycache__/*'",
+		"-not -path '*/node_modules/*'",
+		"-not -name .DS_Store",
+		"2>/dev/null", // missing-root → empty stdout + non-zero exit
+		// Wire shape — emit "TYPE|SIZE|REL_PATH" so parseFindOutput
+		// (and the canvas tree builder) can decode each line.
+		"d|0|",
+		"f|",
+		// Portable stat: GNU first, BSD fallback, then 0.
+		"stat -c %s",
+		"stat -f %z",
+	}
+	for _, w := range wants {
+		if !strings.Contains(got, w) {
+			t.Errorf("buildFindShell missing %q in: %s", w, got)
+		}
+	}
+}
+
+// TestBuildFindShell_DepthForwarding catches a regression where the
+// helper hard-codes a depth instead of using the caller's value.
+// `?depth=` on the canvas side controls how many levels expand on
+// load — losing it means the file tree is either empty (depth=0) or
+// the network blows up on a top-level /home with everyone's $HOME
+// (uncapped).
+func TestBuildFindShell_DepthForwarding(t *testing.T) {
+	for _, d := range []int{1, 3, 5} {
+		got := buildFindShell("/configs", d)
+		want := "-maxdepth " + intToStr(d)
+		if !strings.Contains(got, want) {
+			t.Errorf("buildFindShell depth=%d output missing %q: %s", d, want, got)
+		}
+	}
+}
+
+// intToStr avoids pulling strconv into a one-liner; matches the shell
+// builder's fmt.Sprintf %d output exactly.
+func intToStr(n int) string {
+	if n == 0 {
+		return "0"
+	}
+	neg := n < 0
+	if neg {
+		n = -n
+	}
+	var buf [20]byte
+	i := len(buf)
+	for n > 0 {
+		i--
+		buf[i] = byte('0' + n%10)
+		n /= 10
+	}
+	s := string(buf[i:])
+	if neg {
+		return "-" + s
+	}
+	return s
+}
+
+// TestParseFindOutput pins the parser. Each line is TYPE|SIZE|REL,
+// blank/short lines silently skipped. Pre-PR-A this logic was inlined
+// in the handler with the same shape; extracting + testing separately
+// removes the "regex passes against the inline parser but a future
+// refactor of the handler subtly changes the parse" failure mode.
+func TestParseFindOutput(t *testing.T) {
+	in := []byte(`d|0|nested
+f|123|nested/a.yaml
+f|45|README.md
+
+invalid-line
+f||no-size
+d|0|
+`)
+	got := parseFindOutput(in)
+	// Want 4 entries: nested(d), nested/a.yaml(f,123), README.md(f,45),
+	// no-size(f,0). Blank lines, "invalid-line" (no pipes), and
+	// `d|0|` (empty rel) are skipped.
+	wantPaths := []string{"nested", "nested/a.yaml", "README.md", "no-size"}
+	if len(got) != len(wantPaths) {
+		t.Fatalf("got %d entries, want %d: %+v", len(got), len(wantPaths), got)
+	}
+	for i, w := range wantPaths {
+		if got[i].Path != w {
+			t.Errorf("entry[%d].Path = %q, want %q", i, got[i].Path, w)
+		}
+	}
+	if !got[0].Dir {
+		t.Errorf("entry[0] should be Dir")
+	}
+	if got[1].Size != 123 {
+		t.Errorf("entry[1].Size = %d, want 123", got[1].Size)
+	}
+	if got[3].Size != 0 {
+		t.Errorf("entry[3].Size on missing-size line = %d, want 0", got[3].Size)
+	}
+}
+
+// TestParseFindOutput_EmptyInput — a missing listing root yields
+// empty stdout (find swallows the "No such file" via 2>/dev/null),
+// which must round-trip to a JSON `[]`, not null. The handler does
+// `make([]eicFileEntry, 0)` to enforce this; the test pins the
+// helper-level guarantee independently.
+func TestParseFindOutput_EmptyInput(t *testing.T) {
+	got := parseFindOutput([]byte(""))
+	if got == nil {
+		t.Errorf("parseFindOutput(\"\") returned nil; want empty slice for JSON []")
+	}
+	if len(got) != 0 {
+		t.Errorf("parseFindOutput(\"\") = %+v; want []", got)
+	}
+}
--- a/Show More
+++ b/Show More