Merge remote-tracking branch 'origin/staging' into docs/auto-promote-staging-prereq-comment

# Conflicts: # .github/workflows/auto-promote-staging.yml
2026-04-28 20:46:42 -07:00 · 2026-04-28 20:46:42 -07:00 · 07a17c2e59
commit 07a17c2e59
parent e373fa1a96 54ea64bb01
51 changed files with 2052 additions and 1096 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,80 @@
+# Dependabot — auto-bump pinned dependencies.
+#
+# Why this exists:
+#
+# All `uses:` references in .github/workflows/*.yml are pinned to commit
+# SHAs (with `# v<N>` comments for human readability) instead of mutable
+# tags like `@v4`. Tag pinning is a known supply-chain risk: a maintainer
+# (or compromised maintainer account) can repoint `@v4` to malicious code
+# and our pipelines silently pull it. SHA pinning closes that risk.
+#
+# But SHA pinning has a maintenance cost: each upstream legitimate fix
+# requires manually finding + bumping the SHA. Dependabot for Actions
+# closes that gap by opening PRs to bump pinned SHAs whenever upstream
+# tags a new version. Reviewer evaluates the bump like any other
+# dependency PR.
+#
+# Combined: SHA pinning gives us security, Dependabot keeps us current.
+
+version: 2
+updates:
+  # GitHub Actions — every workflow file under .github/workflows/.
+  # Weekly cadence is enough for a CI surface this size; the supply-
+  # chain attack window is "minutes between repoint and pull," and
+  # weekly auto-bumps don't help with zero-days regardless. The point
+  # is to pull in non-zero-day fixes without operator effort, not to
+  # be real-time.
+  - package-ecosystem: github-actions
+    directory: "/"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - github-actions
+    commit-message:
+      prefix: chore(deps)
+      include: scope
+
+  # Go module — workspace-server. Bumps go.mod deps via PR weekly.
+  - package-ecosystem: gomod
+    directory: "/workspace-server"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - go
+    commit-message:
+      prefix: chore(deps)
+      include: scope
+
+  # npm — canvas (Next.js bundle). Largest dep tree in this repo;
+  # weekly cadence keeps the security surface fresh without flooding
+  # the queue. open-pull-requests-limit: 10 because npm churns more
+  # than the others.
+  - package-ecosystem: npm
+    directory: "/canvas"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 10
+    labels:
+      - dependencies
+      - npm
+    commit-message:
+      prefix: chore(deps)
+      include: scope
+
+  # Python — workspace runtime requirements. Pip/requirements.txt-
+  # backed rather than pyproject.toml; Dependabot supports both.
+  - package-ecosystem: pip
+    directory: "/workspace"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - python
+    commit-message:
+      prefix: chore(deps)
+      include: scope
--- a/.github/workflows/auto-promote-on-e2e.yml
+++ b/.github/workflows/auto-promote-on-e2e.yml
@ -1,31 +1,68 @@
-name: Auto-promote :latest on E2E green
+name: Auto-promote :latest after main image build

 # Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
-# → `:latest` whenever E2E Staging SaaS passes for a `main` push.
+# → `:latest` after either the image build or E2E completes on a `main`
+# push, gated on E2E Staging SaaS not being red for that SHA.
 #
-# This is the doc-aligned alternative to the (deferred) Phase 2 canary
-# fleet — staging E2E catches ~90% of what canary would catch at 0%
-# ongoing infra cost. See `molecule-controlplane/docs/canary-tenants.md`
-# section "Do we actually need canary right now?" — recommended
-# sequencing for the current scale (≤20 paying tenants).
+# Why two triggers:
 #
-# Why a separate workflow rather than folding into e2e-staging-saas.yml:
-#   - Keeps test concerns separate from release concerns.
-#   - Disabling promote (e.g. during an incident) is one toggle, not an
-#     edit to the long E2E workflow file.
-#   - When Phase 2 canary work eventually lands, the canary path can
-#     replace this file's trigger without touching the E2E workflow.
+#   `publish-workspace-server-image` and `e2e-staging-saas` are both
+#   paths-filtered, but with DIFFERENT path sets:
 #
-# Why trigger on `main` only:
-#   - `:latest` is what prod tenants pull. We only want SHAs that have
-#     reached `main` (via auto-promote-staging) to advance `:latest`.
-#   - Triggering on staging would let a staging-only revert advance
-#     `:latest` to a SHA that never reaches `main`, breaking the
-#     "production runs what's on `main`" invariant.
+#     publish-workspace-server-image:
+#       workspace-server/**, canvas/**, manifest.json
+#
+#     e2e-staging-saas (full lifecycle):
+#       workspace-server/internal/handlers/{registry,workspace_provision,
+#       a2a_proxy}.go, workspace-server/internal/middleware/**,
+#       workspace-server/internal/provisioner/**, tests/e2e/test_staging_full_saas.sh
+#
+#   The E2E set is a strict SUBSET of the publish set. So:
+#     - canvas/** changes → publish fires, E2E does not
+#     - workspace-server/cmd/** changes → publish fires, E2E does not
+#     - workspace-server/internal/sweep/** → publish fires, E2E does not
+#
+#   The previous version triggered ONLY on E2E completion, which meant
+#   non-E2E-path changes (canvas, cmd, sweep, etc.) rebuilt the image
+#   but never advanced `:latest`. Result: as of 2026-04-28 this workflow
+#   had run zero times since merge despite eight main pushes — `:latest`
+#   was ~7 hours / 9 PRs behind main with no human realising. See
+#   `molecule-core` Slack discussion 2026-04-28.
+#
+#   Adding `publish-workspace-server-image` as a second trigger closes
+#   the gap: any image rebuild on main eligibly advances `:latest`.
+#
+# Why E2E remains a kill-switch (not the trigger):
+#
+#   When E2E DID run for this SHA and ended red, we abort — `:latest`
+#   stays on the prior known-good digest. When E2E didn't run (paths
+#   filtered out), we proceed: pre-merge gates already validated this
+#   SHA on staging via auto-promote-staging requiring CI + E2E Canvas +
+#   E2E API + CodeQL all green. Image content for non-E2E-paths
+#   (canvas, cmd, sweep) is exercised by those staging gates.
+#
+# Why `main` only:
+#
+#   `:latest` is what prod tenants pull. We only want SHAs that have
+#   reached main (via auto-promote-staging) to advance `:latest`.
+#   Triggering on staging would let a staging-only revert advance
+#   `:latest` to a SHA that never reaches main, breaking the "production
+#   runs what's on main" invariant.
+#
+# Idempotency:
+#
+#   When a SHA touches paths that match BOTH publish and E2E, both
+#   workflows fire and complete. Both trigger this workflow on
+#   completion → two runs race. Both retag `:staging-<sha>` →
+#   `:latest`. crane tag is idempotent (re-tagging the same digest is a
+#   no-op), so the second run is harmless. concurrency group serializes
+#   them anyway.

 on:
  workflow_run:
-    workflows: ['E2E Staging SaaS (full lifecycle)']
+    workflows:
+      - 'E2E Staging SaaS (full lifecycle)'
+      - 'publish-workspace-server-image'
    types: [completed]
    branches: [main]
  workflow_dispatch:
@ -39,15 +76,22 @@ permissions:
  contents: read
  packages: write

+concurrency:
+  # Serialize promotes per-SHA so the publish+E2E both-fired race lands
+  # cleanly. Different SHAs can promote in parallel.
+  group: auto-promote-latest-${{ github.event.workflow_run.head_sha || github.event.inputs.sha || github.sha }}
+  cancel-in-progress: false
+
 env:
  IMAGE_NAME: ghcr.io/molecule-ai/platform
  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant

 jobs:
  promote:
-    # Skip if E2E failed — `:latest` stays on the prior known-good
-    # digest. Manual dispatch always proceeds (the operator already
-    # decided to promote).
+    # Proceed if upstream succeeded OR manual dispatch. Upstream-failure
+    # paths are filtered here; the E2E-was-red kill-switch lives in the
+    # gate-check step below (covers the case where upstream is publish
+    # success but E2E for the same SHA failed).
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
@ -65,9 +109,112 @@ jobs:
          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
          echo "full=${FULL}" >> "$GITHUB_OUTPUT"

-      - uses: imjasonh/setup-crane@v0.4
+      - name: Gate — E2E Staging SaaS state for this SHA
+        # When upstream IS E2E success, we know it's green (filtered by
+        # the job-level `if` already). When upstream is publish, look up
+        # E2E state for the same SHA. Four buckets:
+        #
+        #   - completed/success: E2E confirmed safe → proceed
+        #   - completed/failure|cancelled|timed_out: E2E found a
+        #     regression → ABORT (exit 1), `:latest` stays put
+        #   - in_progress|queued|requested: E2E is RACING with publish
+        #     for a runtime-touching SHA. publish typically completes
+        #     ~5-10min before E2E (~10-15min). If we promote on the
+        #     publish signal here, a later E2E failure can't roll back
+        #     `:latest` — it'd already be wrongly advanced. So we DEFER:
+        #     skip subsequent steps (proceed=false) and let E2E's own
+        #     completion event re-fire this workflow, which then takes
+        #     the upstream-is-E2E path. exit 0 so the run shows as
+        #     success rather than a noisy fake-failure.
+        #   - none/none: E2E was paths-filtered out for this SHA (the
+        #     change touched canvas/cmd/sweep/etc. — paths covered by
+        #     publish but not by E2E). pre-merge gates on staging
+        #     already validated this SHA → proceed.
+        #
+        # Manual dispatch skips this check — operator override.
+        id: gate
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
+          SHA: ${{ steps.sha.outputs.full }}
+          UPSTREAM_NAME: ${{ github.event.workflow_run.name }}
+          EVENT_NAME: ${{ github.event_name }}
+        run: |
+          set -euo pipefail
+
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Manual dispatch — skipping E2E gate (operator override)"
+            exit 0
+          fi
+
+          if [ "$UPSTREAM_NAME" = "E2E Staging SaaS (full lifecycle)" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Upstream is E2E itself (success per job-level if) — gate trivially satisfied"
+            exit 0
+          fi
+
+          # Upstream is publish-workspace-server-image. Check E2E state.
+          RESULT=$(gh run list \
+            --repo "$REPO" \
+            --workflow e2e-staging-saas.yml \
+            --branch main \
+            --commit "$SHA" \
+            --limit 1 \
+            --json status,conclusion \
+            --jq '.[0] | "\(.status)/\(.conclusion // "none")"' \
+            2>/dev/null || echo "none/none")
+
+          echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
+
+          case "$RESULT" in
+            completed/success)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E green for this SHA — proceeding with promote"
+              ;;
+            completed/failure|completed/cancelled|completed/timed_out)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
+                echo "\`:latest\` stays on the prior known-good digest."
+                echo
+                echo "If the failure was a flake, manually dispatch this workflow with the same sha to override."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            in_progress/*|queued/*|requested/*|waiting/*|pending/*)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ⏳ Auto-promote deferred — E2E Staging SaaS still running"
+                echo
+                echo "Publish completed before E2E for \`${SHA:0:7}\` (state: \`$RESULT\`)."
+                echo "Skipping retag here — E2E's own completion event will re-fire this workflow."
+                echo "If E2E ends green, that run promotes \`:latest\`. If red, it aborts."
+              } >> "$GITHUB_STEP_SUMMARY"
+              ;;
+            none/none)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E paths-filtered out for this SHA — pre-merge staging gates carry"
+              ;;
+            *)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❓ Auto-promote aborted — unexpected E2E state"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\` (unhandled)"
+                echo "Manual investigation needed; re-dispatch with the same sha once resolved."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+          esac
+
+      - if: steps.gate.outputs.proceed == 'true'
+        uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4

      - name: GHCR login
+        if: steps.gate.outputs.proceed == 'true'
        run: |
          echo "${{ secrets.GITHUB_TOKEN }}" | \
            crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
@ -76,35 +223,39 @@ jobs:
        # Better to fail fast with a clear message than to half-tag
        # (platform retagged but platform-tenant missing → tenants pull
        # a stale image).
+        if: steps.gate.outputs.proceed == 'true'
        run: |
          set -euo pipefail
          for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
            tag="${img}:staging-${{ steps.sha.outputs.short }}"
            if ! crane manifest "$tag" >/dev/null 2>&1; then
              echo "::error::Missing tag: $tag"
-              echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote-on-e2e can retag :latest."
+              echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote can retag :latest."
              exit 1
            fi
            echo "  ok: $tag exists"
          done

      - name: Retag platform :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
        run: |
          crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest

      - name: Retag tenant :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
        run: |
          crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest

      - name: Summary
+        if: steps.gate.outputs.proceed == 'true'
        run: |
          {
-            echo "## E2E green → :latest promoted"
+            echo "## :latest promoted to ${{ steps.sha.outputs.short }}"
            echo
            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
              echo "- Trigger: manual dispatch"
            else
-              echo "- Upstream E2E run: ${{ github.event.workflow_run.html_url }}"
+              echo "- Upstream: \`${{ github.event.workflow_run.name }}\` ([run](${{ github.event.workflow_run.html_url }}))"
            fi
            echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
            echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
--- a/.github/workflows/auto-promote-staging.yml
+++ b/.github/workflows/auto-promote-staging.yml
@ -1,27 +1,46 @@
 name: Auto-promote staging → main

 # Fires after any of the staging-branch quality gates complete. When ALL
-# required gates are green on the same staging SHA, fast-forwards `main`
-# to that SHA automatically — closing the gap that historically let
-# features sit on staging for weeks waiting for a bulk promotion PR
-# (see molecule-core#1496 for the 1172-commit example).
+# required gates are green on the same staging SHA, opens (or re-uses)
+# a PR `staging → main` and enables auto-merge so the merge queue lands
+# it. Closes the gap that historically let features sit on staging for
+# weeks waiting for a bulk promotion PR (see molecule-core#1496 for the
+# 1172-commit example).
+#
+# 2026-04-28 rewrite (PR #142): the previous version did a direct
+# `git merge --ff-only origin staging && git push origin main`. That
+# breaks against main's branch-protection ruleset, which requires
+# status checks "set by the expected GitHub apps" — direct pushes
+# can't satisfy that condition (only PR merges through the queue can).
+# The workflow was failing every tick with:
+#   remote: error: GH006: Protected branch update failed for refs/heads/main.
+#   remote: - Required status checks ... were not set by the expected GitHub apps.
+# Fix: mirror the PR-based pattern from auto-sync-main-to-staging.yml
+# (the reverse-direction sync, fixed in #2234 for the same reason).
+# Both directions now use the same merge-queue path that humans use,
+# no special-case bypass.
 #
 # Safety model:
 # - Runs ONLY on workflow_run events for the staging branch.
 # - Requires EVERY named gate workflow to have the same head_sha and
 #   all be `conclusion == success`. If any of them is red, skipped,
 #   cancelled, or pending, we abort (stay on the current main).
-# - Uses --ff-only: refuses to advance main if main has diverged from
-#   the staging history (e.g. a hotfix landed directly on main). In
-#   that case a human resolves the fork.
-# - Writes a commit summary so the promote shows up in git log as a
-#   deliberate act, not a stealth move.
+# - The PR base=main head=staging path lets GitHub itself enforce
+#   branch protection. If main has diverged from staging or required
+#   checks aren't satisfied, the merge queue declines the PR — no
+#   need for a manual ff-only ancestry check here.
+# - Loop safety: the auto-sync-main-to-staging workflow fires when
+#   main lands the auto-promote PR, but its merge into staging is by
+#   GITHUB_TOKEN which doesn't trigger downstream workflow_run events
+#   (GitHub Actions safety). So this workflow doesn't re-fire from
+#   its own promote landing.
 #
-# **Initial rollout:** ship this file but leave the `enabled` input set
-# such that nothing auto-promotes until staging CI has been reliably
-# green for a few days. Toggle via repo variable `AUTO_PROMOTE_ENABLED`.
+# Toggle via repo variable AUTO_PROMOTE_ENABLED (true/unset). When
+# unset, the workflow logs what it would have done but doesn't open
+# the PR — useful for dry-running the gate logic without surfacing
+# a noisy PR while staging CI is still flaky.
 #
-# **One-time repo setting (load-bearing):** this workflow opens a
+# **One-time repo setting (load-bearing):** this workflow opens the
 # staging→main PR via `gh pr create` using the default GITHUB_TOKEN.
 # Since GitHub's 2022 default change, that token cannot create or
 # approve PRs unless the repo opts in. The toggle is at:
@ -56,6 +75,7 @@ on:

 permissions:
  contents: write
+  pull-requests: write

 jobs:
  check-all-gates-green:
@ -152,14 +172,14 @@ jobs:
          set -eu
          # Repo variable AUTO_PROMOTE_ENABLED=true flips this on. While
          # it's unset, the workflow dry-runs (logs what it would have
-          # done) but doesn't actually push to main. Set the variable in
+          # done) but doesn't open the promote PR. Set the variable in
          # Settings → Secrets and variables → Actions → Variables.
          if [ "${AUTO_PROMOTE_ENABLED:-}" != "true" ] && [ "${FORCE_INPUT:-false}" != "true" ]; then
            {
              echo "## ⏸ Auto-promote disabled"
              echo
              echo "Repo variable \`AUTO_PROMOTE_ENABLED\` is not set to \`true\`."
-              echo "All gates are green on staging; would have promoted to \`main\`."
+              echo "All gates are green on staging; would have opened a promote PR to \`main\`."
              echo
              echo "To enable: Settings → Secrets and variables → Actions → Variables → \`AUTO_PROMOTE_ENABLED=true\`."
              echo "To test once manually: workflow_dispatch with \`force=true\`."
@ -168,50 +188,55 @@ jobs:
            exit 0
          fi

-      - name: Checkout main
-        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
-        uses: actions/checkout@v4
-        with:
-          ref: main
-          fetch-depth: 0
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Fast-forward main → staging HEAD
+      - name: Open (or reuse) staging → main promote PR + enable auto-merge
        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
          TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
        run: |
-          set -eu
-          git config user.name "github-actions[bot]"
-          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          set -euo pipefail

-          git fetch origin staging
-          git fetch origin main
+          # Look for an existing open promote PR (idempotent on re-run
+          # of the workflow). The PR's head IS the staging branch — the
+          # whole point is "advance main to staging's tip", so we don't
+          # need a per-SHA branch like auto-sync-main-to-staging uses.
+          PR_NUM=$(gh pr list --repo "$REPO" \
+            --base main --head staging --state open \
+            --json number --jq '.[0].number // ""')

-          # Refuse to advance main if it's diverged from staging history.
-          # Someone landed a commit directly on main that's not on
-          # staging → human needs to decide how to reconcile.
-          if ! git merge-base --is-ancestor "$(git rev-parse origin/main)" "$TARGET_SHA"; then
-            {
-              echo "## ❌ Auto-promote refused — main has diverged"
-              echo
-              echo "\`main\` (\`$(git rev-parse --short origin/main)\`) is not an ancestor of staging (\`${TARGET_SHA:0:7}\`)."
-              echo "Someone committed directly to main or the histories forked."
-              echo
-              echo "Resolve manually: merge main into staging, get CI green on the merged commit,"
-              echo "then the auto-promote will succeed on the next run."
-            } >> "$GITHUB_STEP_SUMMARY"
-            exit 1
+          if [ -z "$PR_NUM" ]; then
+            TITLE="staging → main: auto-promote ${TARGET_SHA:0:7}"
+            BODY_FILE=$(mktemp)
+            cat > "$BODY_FILE" <<EOFBODY
+          Automated promotion of \`staging\` (\`${TARGET_SHA:0:8}\`) to \`main\`. All required staging gates green at this SHA: CI, E2E Staging Canvas, E2E API Smoke, CodeQL.
+
+          This PR is auto-generated by \`.github/workflows/auto-promote-staging.yml\` whenever every required gate completes green on the same staging SHA. It exists because main's branch protection requires status checks "set by the expected GitHub apps" — direct \`git push\` from a workflow can't satisfy that, only PR merges through the queue can.
+
+          Merge queue lands this; no human action needed unless gates fail. Reverse-direction sync (the merge commit on main → staging) is handled by \`auto-sync-main-to-staging.yml\`.
+          EOFBODY
+            PR_URL=$(gh pr create --repo "$REPO" \
+              --base main --head staging \
+              --title "$TITLE" \
+              --body-file "$BODY_FILE")
+            PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
+            rm -f "$BODY_FILE"
+            echo "::notice::Opened PR #${PR_NUM}"
+          else
+            echo "::notice::Re-using existing promote PR #${PR_NUM}"
          fi

-          # Fast-forward main to the target SHA.
-          git checkout main
-          git merge --ff-only "$TARGET_SHA"
-          git push origin main
+          # Enable auto-merge — the merge queue picks it up once
+          # required gates are green on the merge_group ref.
+          if ! gh pr merge "$PR_NUM" --repo "$REPO" --auto --merge 2>&1; then
+            echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
+          fi

          {
-            echo "## ✅ Auto-promoted main → ${TARGET_SHA:0:7}"
+            echo "## ✅ Auto-promote PR opened"
            echo
-            echo "All gate workflows green on staging at this SHA."
-            echo "\`main\` fast-forwarded to match."
+            echo "- Source: staging at \`${TARGET_SHA:0:8}\`"
+            echo "- PR: #${PR_NUM}"
+            echo
+            echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-sync-main-to-staging.yml
+++ b/.github/workflows/auto-sync-main-to-staging.yml
@ -17,35 +17,45 @@ name: Auto-sync main → staging
 # bridges). Each time the bridge needed update-branch + a re-CI
 # round before merging. Operationally annoying and avoidable.
 #
-# This workflow closes the gap automatically:
+# Architecture:
 #
-#   1. Push to main fires (regardless of source: auto-promote, UI
-#      merge, API merge, direct push).
-#   2. Check whether main is already in staging's ancestry — if
-#      yes, no-op (auto-promote-staging already kept them in sync
-#      via fast-forward).
-#   3. If not, try fast-forward staging to main first (works when
-#      staging hasn't diverged with its own commits).
-#   4. If ff fails (staging has commits main doesn't — feature work
-#      in flight), do a real merge with a "chore: sync" commit so
-#      staging absorbs main's tip while keeping its own history.
-#   5. Push staging.
+# This repo's `staging` branch is protected by a `merge_queue`
+# ruleset (id 15500102) that blocks ALL direct pushes — no bypass
+# even for org admins or the GitHub Actions integration. Direct
+# `git push origin staging` returns GH013. So instead of pushing
+# directly, this workflow:
+#
+#   1. Checks if main is already in staging's ancestry → no-op.
+#   2. Creates an `auto-sync/main-<sha>` branch from staging.
+#   3. Tries `git merge --ff-only origin/main` → if staging hasn't
+#      diverged this is a clean ff.
+#   4. Otherwise `git merge --no-ff origin/main` to absorb main's
+#      tip while keeping staging's history.
+#   5. Pushes the auto-sync branch.
+#   6. Opens a PR (base=staging, head=auto-sync/main-<sha>) and
+#      enables auto-merge so the merge queue lands it.
+#
+# This mirrors the path human PRs take through staging — same
+# rules, same gates, no special-case bypass.
 #
 # Loop safety:
 #
-# `GITHUB_TOKEN`-authored pushes do NOT trigger downstream workflow
-# runs by default (GitHub Actions safety). So when this workflow
-# pushes the synced staging, `auto-promote-staging.yml` is NOT
-# triggered by that push. The next developer push to staging triggers
-# auto-promote normally. No loop is even theoretically possible.
+# `GITHUB_TOKEN`-authored merges (including the merge queue's land
+# of the auto-sync PR) do NOT trigger downstream workflow runs
+# (GitHub Actions safety). So when the auto-sync PR lands on
+# staging, `auto-promote-staging.yml` is NOT triggered by that
+# push. The next developer push to staging triggers auto-promote
+# normally. No loop possible.
 #
 # Concurrency:
 #
 # Two pushes to main in quick succession (e.g., manual UI merge
-# immediately followed by auto-promote-staging's ff-merge) would
-# otherwise race two auto-sync runs against the same staging branch
-# — second push fails non-fast-forward. The concurrency group
-# serializes them so the second run sees the first's result.
+# immediately followed by auto-promote-staging's ff-merge) could
+# otherwise open two overlapping auto-sync PRs. The concurrency
+# group serializes runs; the second waits for the first to exit.
+# (The first run exits after opening + auto-merge-queueing the PR,
+# not after the merge actually completes — so multiple PRs can be
+# open simultaneously, but the merge queue handles them serially.)

 on:
  push:
@ -53,6 +63,7 @@ on:

 permissions:
  contents: write
+  pull-requests: write

 concurrency:
  group: auto-sync-main-to-staging
@ -60,10 +71,11 @@ concurrency:

 jobs:
  sync-staging:
-    runs-on: ubuntu-latest
+    # Self-hosted Mac mini matches the rest of this repo's workflows.
+    runs-on: [self-hosted, macos, arm64]
    steps:
      - name: Checkout staging
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 0
          ref: staging
@ -85,65 +97,117 @@ jobs:
              echo "## ✅ No-op"
              echo
              echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
-              echo "auto-promote-staging or a previous auto-sync run already kept them aligned."
            } >> "$GITHUB_STEP_SUMMARY"
          else
            echo "needs_sync=true" >> "$GITHUB_OUTPUT"
-            echo "::notice::staging is missing main's tip — sync needed"
+            MAIN_SHORT=$(git rev-parse --short=8 origin/main)
+            echo "main_short=${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "branch=auto-sync/main-${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "::notice::staging is missing main's tip (${MAIN_SHORT}) — opening sync PR"
          fi

-      - name: Fast-forward staging to main
+      - name: Create auto-sync branch + merge main
        if: steps.check.outputs.needs_sync == 'true'
-        id: ff
+        id: prep
        run: |
          set -euo pipefail
+          BRANCH="${{ steps.check.outputs.branch }}"
+
+          # If a previous auto-sync run already opened a branch for the
+          # same main sha, prefer reusing it (idempotent behavior on
+          # workflow restart). Force-update from latest staging anyway
+          # so it absorbs any staging-side commits that landed since.
+          git checkout -B "$BRANCH"
+
          if git merge --ff-only origin/main; then
            echo "did_ff=true" >> "$GITHUB_OUTPUT"
-            echo "::notice::Fast-forwarded staging to origin/main"
+            echo "::notice::Fast-forwarded ${BRANCH} to origin/main"
          else
            echo "did_ff=false" >> "$GITHUB_OUTPUT"
-            echo "::notice::ff failed — staging has its own commits; will create merge"
-          fi
-
-      - name: Merge main into staging (when ff fails)
-        if: steps.check.outputs.needs_sync == 'true' && steps.ff.outputs.did_ff != 'true'
-        run: |
-          set -euo pipefail
-          # ff failed because staging has commits main doesn't — typical
-          # in-flight feature work. Create a merge commit so staging
-          # absorbs main's tip while keeping its own history.
            if ! git merge --no-ff origin/main -m "chore: sync main → staging (auto)"; then
-            # Hygiene: leave the work tree clean before failing. Doesn't
-            # affect future runs (each gets a fresh checkout) but a
-            # half-merged tree is an unpleasant artifact to debug if
-            # anyone ever shells into the runner.
+              # Hygiene: leave the work tree clean before failing.
              git merge --abort || true
              {
                echo "## ❌ Conflict"
                echo
                echo "Auto-merge \`main → staging\` failed with conflicts."
-              echo "A human needs to resolve manually:"
-              echo
-              echo "    git checkout staging"
-              echo "    git merge origin/main"
-              echo "    # resolve, commit, push"
+                echo "A human needs to resolve manually."
              } >> "$GITHUB_STEP_SUMMARY"
              exit 1
            fi
+          fi

-      - name: Push staging
+      - name: Push auto-sync branch
        if: steps.check.outputs.needs_sync == 'true'
        run: |
          set -euo pipefail
-          git push origin staging
-          {
-            if [ "${{ steps.ff.outputs.did_ff }}" = "true" ]; then
-              echo "## ✅ staging fast-forwarded"
-              echo
-              echo "staging is now at \`$(git rev-parse --short=8 HEAD)\` (== origin/main)."
+          # Force-with-lease so a concurrent auto-sync run can't
+          # silently clobber an in-flight branch we just updated. If a
+          # different writer touched the branch, we abort and the next
+          # run picks up the latest state.
+          git push --force-with-lease origin "${{ steps.check.outputs.branch }}"
+
+      - name: Open auto-sync PR + enable auto-merge
+        if: steps.check.outputs.needs_sync == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          BRANCH: ${{ steps.check.outputs.branch }}
+          MAIN_SHORT: ${{ steps.check.outputs.main_short }}
+          DID_FF: ${{ steps.prep.outputs.did_ff }}
+        run: |
+          set -euo pipefail
+
+          # Find existing PR for this branch (idempotent on workflow
+          # restart) before creating a new one.
+          PR_NUM=$(gh pr list --head "$BRANCH" --base staging --state open --json number --jq '.[0].number // ""')
+
+          if [ -z "$PR_NUM" ]; then
+            # Body lives in a temp file to keep the multi-line content
+            # out of the YAML block scalar (un-indented newlines inside
+            # an inline shell string break YAML parsing).
+            BODY_FILE=$(mktemp)
+            if [ "$DID_FF" = "true" ]; then
+              TITLE="chore: sync main → staging (auto, ff to ${MAIN_SHORT})"
+              cat > "$BODY_FILE" <<EOFBODY
+          Automated fast-forward of \`staging\` to \`origin/main\` (\`${MAIN_SHORT}\`). Staging has no in-flight commits that diverge from main. Merge queue lands this; no human action needed.
+
+          This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`. It exists because this repo's \`staging\` branch has a \`merge_queue\` ruleset that blocks direct pushes — even from the GitHub Actions integration.
+          EOFBODY
            else
-              echo "## ✅ staging absorbed main"
-              echo
-              echo "staging is now at \`$(git rev-parse --short=8 HEAD)\` with a merge commit absorbing main's tip."
+              TITLE="chore: sync main → staging (auto, merge ${MAIN_SHORT})"
+              cat > "$BODY_FILE" <<EOFBODY
+          Automated merge of \`origin/main\` (\`${MAIN_SHORT}\`) into \`staging\`. Staging has commits main doesn't, so this is a non-ff merge that absorbs main's tip. Merge queue lands this.
+
+          This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`.
+          EOFBODY
            fi
+
+            # gh pr create prints the URL on stdout; extract the PR number.
+            PR_URL=$(gh pr create \
+              --base staging \
+              --head "$BRANCH" \
+              --title "$TITLE" \
+              --body-file "$BODY_FILE")
+            PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
+            rm -f "$BODY_FILE"
+            echo "::notice::Opened PR #${PR_NUM}"
+          else
+            echo "::notice::Re-using existing PR #${PR_NUM} for ${BRANCH}"
+          fi
+
+          # Enable auto-merge — the merge queue picks it up once
+          # required gates are green. Use --merge for merge commits
+          # (matches the rest of this repo's PR convention).
+          if ! gh pr merge "$PR_NUM" --auto --merge 2>&1; then
+            echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
+          fi
+
+          {
+            echo "## ✅ Auto-sync PR opened"
+            echo
+            echo "- Branch: \`$BRANCH\`"
+            echo "- PR: #$PR_NUM"
+            echo "- Strategy: $([ "$DID_FF" = "true" ] && echo "ff" || echo "merge commit")"
+            echo
+            echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
          } >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/auto-tag-runtime.yml
+++ b/.github/workflows/auto-tag-runtime.yml
@ -38,7 +38,7 @@ jobs:
  tag:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 0    # need full tag history for `git describe` / sort

--- a/.github/workflows/block-internal-paths.yml
+++ b/.github/workflows/block-internal-paths.yml
@ -26,7 +26,7 @@ jobs:
    name: Block forbidden paths
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 2  # need previous commit to diff against on push events

--- a/.github/workflows/canary-staging.yml
+++ b/.github/workflows/canary-staging.yml
@ -66,7 +66,7 @@ jobs:
      E2E_RUN_ID: "canary-${{ github.run_id }}"

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@ -98,7 +98,7 @@ jobs:
      # next deploy window.
      - name: Open issue on failure
        if: failure()
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
        env:
          # Inject the workflow path explicitly — context.workflow is
          # the *name*, not the file path the actions API needs.
@ -165,7 +165,7 @@ jobs:

      - name: Auto-close canary issue on success
        if: success()
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
        with:
          script: |
            const title = '🔴 Canary failing: staging SaaS smoke';
--- a/.github/workflows/canary-verify.yml
+++ b/.github/workflows/canary-verify.yml
@ -40,7 +40,7 @@ jobs:
      smoke_ran: ${{ steps.smoke.outputs.ran }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Compute sha
        id: compute
@ -143,7 +143,7 @@ jobs:
    if: ${{ needs.canary-smoke.result == 'success' && needs.canary-smoke.outputs.smoke_ran == 'true' }}
    runs-on: ubuntu-latest
    steps:
-      - uses: imjasonh/setup-crane@v0.4
+      - uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4

      - name: GHCR login
        run: |
--- a/.github/workflows/check-merge-group-trigger.yml
+++ b/.github/workflows/check-merge-group-trigger.yml
@ -36,7 +36,7 @@ jobs:
    permissions:
      contents: read
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - name: Verify merge_group trigger on required-check workflows
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -32,7 +32,7 @@ jobs:
      python: ${{ steps.check.outputs.python }}
      scripts: ${{ steps.check.outputs.scripts }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 0
      - id: check
@ -72,8 +72,8 @@ jobs:
      run:
        working-directory: workspace-server
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
        with:
          go-version: 'stable'
      - run: go mod download
@ -187,8 +187,8 @@ jobs:
      run:
        working-directory: canvas
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-node@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '22'
      - run: rm -f package-lock.json && npm install
@ -210,7 +210,7 @@ jobs:
    if: needs.changes.outputs.scripts == 'true'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
        # shellcheck is pre-installed on ubuntu-latest runners (via apt).
        # infra/scripts/ is included because setup.sh + nuke.sh gate the
@ -276,8 +276,8 @@ jobs:
      run:
        working-directory: workspace
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -53,14 +53,14 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Checkout sibling plugin repo
        # Same reasoning as publish-workspace-server-image.yml — the Go
        # module's replace directive needs the plugin source so
        # CodeQL's "go build" phase can resolve.
        if: matrix.language == 'go'
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
          path: molecule-ai-plugin-github-app-auth
@ -69,7 +69,7 @@ jobs:
      # jq is pre-installed on ubuntu-latest — no setup step needed.

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v3
+        uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          languages: ${{ matrix.language }}
          # security-extended widens past the default to include the
@ -77,11 +77,11 @@ jobs:
          queries: security-extended

      - name: Autobuild
-        uses: github/codeql-action/autobuild@v3
+        uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2

      - name: Perform CodeQL Analysis
        id: analyze
-        uses: github/codeql-action/analyze@v3
+        uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          category: "/language:${{ matrix.language }}"
          # upload: never — GHAS isn't enabled on this repo, so the
@ -121,7 +121,7 @@ jobs:
        # 14-day retention — longer than default 3, short enough not
        # to bloat quota.
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: codeql-sarif-${{ matrix.language }}
          path: sarif-results/${{ matrix.language }}/
--- a/.github/workflows/e2e-api.yml
+++ b/.github/workflows/e2e-api.yml
@ -27,7 +27,17 @@ on:
  workflow_dispatch:

 concurrency:
-  group: e2e-api-${{ github.ref }}
+  # Per-SHA grouping (changed 2026-04-28 from per-ref). Per-ref had the
+  # same auto-promote-staging brittleness as e2e-staging-canvas — back-
+  # to-back staging pushes share refs/heads/staging, so the older push's
+  # queued run gets cancelled when a newer push lands. Auto-promote-
+  # staging then sees `completed/cancelled` for the older SHA and stays
+  # put; the newer SHA's gates may eventually save the day, but if the
+  # newer push gets cancelled too, we deadlock.
+  #
+  # See e2e-staging-canvas.yml's identical concurrency block for the full
+  # rationale and the 2026-04-28 incident reference.
+  group: e2e-api-${{ github.event.pull_request.head.sha || github.sha }}
  cancel-in-progress: false

 jobs:
@ -36,8 +46,8 @@ jobs:
    outputs:
      api: ${{ steps.decide.outputs.api }}
    steps:
-      - uses: actions/checkout@v4
-      - uses: dorny/paths-filter@v3
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
        id: filter
        with:
          filters: |
@ -56,9 +66,18 @@ jobs:
            echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT"
          fi

+  # Same `name:` as the real job below so the check-run produced by the
+  # no-op path is indistinguishable from the real one for branch
+  # protection purposes. Without this, the real job was always skipped on
+  # paths-filtered commits → branch protection on `main` saw "E2E API
+  # Smoke Test" as a missing required check → auto-promote-staging's
+  # `git push origin main` got rejected with GH006. Observed 2026-04-28
+  # 00:22 UTC blocking the staging→main promote despite all gates
+  # actually passing at the workflow level.
  no-op:
    needs: detect-changes
    if: needs.detect-changes.outputs.api != 'true'
+    name: E2E API Smoke Test
    runs-on: ubuntu-latest
    steps:
      - run: |
@ -78,8 +97,8 @@ jobs:
      PG_CONTAINER: molecule-ci-postgres
      REDIS_CONTAINER: molecule-ci-redis
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
        with:
          go-version: 'stable'
          cache: true
--- a/.github/workflows/e2e-staging-canvas.yml
+++ b/.github/workflows/e2e-staging-canvas.yml
@ -37,7 +37,25 @@ on:
    - cron: '0 8 * * 0'

 concurrency:
-  group: e2e-staging-canvas
+  # Per-SHA grouping (changed 2026-04-28 from a single global group). The
+  # global group made auto-promote-staging brittle: when a staging push
+  # queued behind an in-flight run and a third entrant (a PR run, a
+  # follow-on push) entered the group, the staging push got cancelled —
+  # leaving auto-promote-staging looking at `completed/cancelled` for a
+  # required gate and refusing to advance main. Observed 2026-04-28
+  # 23:51-23:53 on staging tip 3f99fede.
+  #
+  # The original intent of the global group was to throttle parallel
+  # E2E provisions (each spins a fresh EC2). At our scale that throttle
+  # isn't worth the correctness cost — fresh-org-per-run isolates the
+  # state, and the cost of two parallel runs (~$0.001/min × 10min × 2)
+  # is rounding error vs. the cost of a stuck pipeline.
+  #
+  # Per-SHA still dedupes accidental double-triggers for the SAME SHA.
+  # It does NOT cancel obsolete-PR-version runs on force-push; that
+  # wasted CI is acceptable given the alternative is losing staging-tip
+  # data that auto-promote-staging needs.
+  group: e2e-staging-canvas-${{ github.event.pull_request.head.sha || github.sha }}
  cancel-in-progress: false

 jobs:
@ -46,8 +64,8 @@ jobs:
    outputs:
      canvas: ${{ steps.decide.outputs.canvas }}
    steps:
-      - uses: actions/checkout@v4
-      - uses: dorny/paths-filter@v3
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
        id: filter
        with:
          filters: |
@ -64,9 +82,14 @@ jobs:
            echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT"
          fi

+  # Same `name:` as the playwright job below so the check-run is
+  # indistinguishable from the real one for branch protection. Mirrors
+  # the e2e-api.yml fix in the same PR — see that file for the
+  # 2026-04-28 incident reference.
  no-op:
    needs: detect-changes
    if: needs.detect-changes.outputs.canvas != 'true'
+    name: Canvas tabs E2E
    runs-on: ubuntu-latest
    steps:
      - run: |
@ -90,7 +113,7 @@ jobs:
        working-directory: canvas

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@ -100,7 +123,7 @@ jobs:
          fi

      - name: Set up Node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '20'
          cache: 'npm'
@ -117,7 +140,7 @@ jobs:

      - name: Upload Playwright report on failure
        if: failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: playwright-report-staging
          path: canvas/playwright-report-staging/
@ -125,7 +148,7 @@ jobs:

      - name: Upload screenshots on failure
        if: failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: playwright-screenshots
          path: canvas/test-results/
--- a/.github/workflows/e2e-staging-saas.yml
+++ b/.github/workflows/e2e-staging-saas.yml
@ -92,7 +92,7 @@ jobs:
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
--- a/.github/workflows/e2e-staging-sanity.yml
+++ b/.github/workflows/e2e-staging-sanity.yml
@ -50,7 +50,7 @@ jobs:
      E2E_INTENTIONAL_FAILURE: "1"

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@ -89,7 +89,7 @@ jobs:

      - name: Open issue if safety net is broken
        if: failure()
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
        with:
          script: |
            const title = "🚨 E2E teardown safety net broken";
--- a/.github/workflows/promote-latest.yml
+++ b/.github/workflows/promote-latest.yml
@ -34,7 +34,7 @@ jobs:
  promote:
    runs-on: ubuntu-latest
    steps:
-      - uses: imjasonh/setup-crane@v0.4
+      - uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4

      - name: GHCR login
        run: |
--- a/.github/workflows/publish-canvas-image.yml
+++ b/.github/workflows/publish-canvas-image.yml
@ -42,17 +42,17 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Log in to GHCR
-        uses: docker/login-action@v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

      - name: Compute tags
        id: tags
@ -85,7 +85,7 @@ jobs:
          echo "ws_url=${WS_URL}" >> "$GITHUB_OUTPUT"

      - name: Build & push canvas image to GHCR
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: ./canvas
          file: ./canvas/Dockerfile
--- a/.github/workflows/publish-runtime.yml
+++ b/.github/workflows/publish-runtime.yml
@ -81,9 +81,9 @@ jobs:
      version: ${{ steps.version.outputs.version }}
      wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.11"
          cache: pip
@ -419,10 +419,33 @@ jobs:
          RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
        run: |
          set +e   # don't abort on a single repo failure — collect them all
+          # Schedule-vs-dispatch behaviour split (hardened 2026-04-28
+          # after the sweep-cf-orphans soft-skip incident — same class
+          # of bug):
+          #
+          # The earlier "skipping cascade. templates will pick up the
+          # new version on their own next rebuild" message was wrong —
+          # templates only build on this dispatch trigger; without it
+          # they stay pinned to whatever runtime version they last saw.
+          # A silent skip here means "PyPI is current, templates are
+          # not" and the gap is invisible until someone notices a
+          # template still on the old version weeks later.
+          #
+          #   - push                → exit 1 (red CI surfaces the gap)
+          #   - workflow_dispatch   → exit 0 with a warning (operator
+          #                           ran this ad-hoc; let them rerun
+          #                           after fixing the secret)
          if [ -z "$DISPATCH_TOKEN" ]; then
-            echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade. PyPI was published; templates will pick up the new version on their own next rebuild."
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade."
+              echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually."
              exit 0
            fi
+            echo "::error::TEMPLATE_DISPATCH_TOKEN secret missing — cascade cannot fan out."
+            echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade."
+            echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch."
+            exit 1
+          fi
          VERSION="$RUNTIME_VERSION"
          if [ -z "$VERSION" ]; then
            echo "::error::publish job did not expose a version output — cascade cannot fan out"
--- a/.github/workflows/publish-workspace-server-image.yml
+++ b/.github/workflows/publish-workspace-server-image.yml
@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Checkout sibling plugin repo
        # workspace-server/Dockerfile expects
@ -42,21 +42,21 @@ jobs:
        # The PAT needs Contents:Read on Molecule-AI/molecule-ai-plugin-
        # github-app-auth. Falls back to the default token for the (rare)
        # case where an operator made the plugin repo public.
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
          path: molecule-ai-plugin-github-app-auth
          token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}

      - name: Log in to GHCR
-        uses: docker/login-action@v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

      - name: Compute tags
        id: tags
@ -87,7 +87,7 @@ jobs:
      # applyRuntimeModelEnv and caused every E2E to route hermes+openai
      # through openrouter → 401). See issue filed with this PR.
      - name: Build & push platform image to GHCR (staging-<sha> + staging-latest)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          file: ./workspace-server/Dockerfile
@ -104,7 +104,7 @@ jobs:
            org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify

      - name: Build & push tenant image to GHCR (staging-<sha> + staging-latest)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          file: ./workspace-server/Dockerfile.tenant
--- a/.github/workflows/runtime-pin-compat.yml
+++ b/.github/workflows/runtime-pin-compat.yml
@ -60,8 +60,8 @@ jobs:
    name: PyPI-latest install + import smoke
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
--- a/.github/workflows/runtime-prbuild-compat.yml
+++ b/.github/workflows/runtime-prbuild-compat.yml
@ -61,8 +61,8 @@ jobs:
    name: PR-built wheel + import smoke
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
--- a/.github/workflows/secret-pattern-drift.yml
+++ b/.github/workflows/secret-pattern-drift.yml
@ -49,7 +49,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.11"

--- a/.github/workflows/secret-scan.yml
+++ b/.github/workflows/secret-scan.yml
@ -40,7 +40,7 @@ jobs:
    name: Scan diff for credential-shaped strings
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 2  # need previous commit to diff against on push events

--- a/.github/workflows/sweep-cf-orphans.yml
+++ b/.github/workflows/sweep-cf-orphans.yml
@ -78,15 +78,30 @@ jobs:
      MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '50' }}

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify required secrets present
        id: verify
-        # Soft skip when secrets aren't configured. The 6 secrets have
-        # to be set on the repo manually before this workflow can do
-        # real work; until they are, the schedule is a no-op rather
-        # than a recurring red CI run. workflow_dispatch surfaces a
-        # warning so an operator running it ad-hoc sees the gap.
+        # Schedule-vs-dispatch behaviour split (hardened 2026-04-28
+        # after the silent-no-op incident below):
+        #
+        # The earlier soft-skip-on-schedule policy hid a real leak. All
+        # six secrets were unset on this repo for an unknown duration;
+        # every hourly run printed a yellow ::warning:: and exited 0,
+        # so the workflow registered as "passing" while doing nothing.
+        # CF orphans accumulated to 152/200 (~76% of the zone quota
+        # gone) before a manual `dig`-driven audit caught it. Anything
+        # that runs as a janitor and reports green while idle is
+        # indistinguishable from "the janitor is healthy" — so we now
+        # treat schedule (and any future workflow_run/push triggers)
+        # as a hard-fail when secrets are missing.
+        #
+        #   - schedule / workflow_run / push → exit 1 (red CI run
+        #     surfaces the misconfiguration the next tick)
+        #   - workflow_dispatch              → exit 0 with a warning
+        #     (an operator ran this ad-hoc; they already accepted the
+        #     state of the repo and want the workflow to short-circuit
+        #     so they can rerun after fixing the secret)
        run: |
          missing=()
          for var in CF_API_TOKEN CF_ZONE_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
@ -95,10 +110,17 @@ jobs:
            fi
          done
          if [ ${#missing[@]} -gt 0 ]; then
-            echo "::warning::skipping sweep — secrets not yet configured: ${missing[*]}"
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "::warning::skipping sweep — secrets not configured: ${missing[*]}"
+              echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun."
              echo "skip=true" >> "$GITHUB_OUTPUT"
              exit 0
            fi
+            echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
+            echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
+            echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
+            exit 1
+          fi
          echo "All required secrets present ✓"
          echo "skip=false" >> "$GITHUB_OUTPUT"

--- a/.github/workflows/test-ops-scripts.yml
+++ b/.github/workflows/test-ops-scripts.yml
@ -27,8 +27,8 @@ jobs:
    name: Ops scripts (unittest)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
      - name: Run unittest
--- a/canvas/package-lock.json
+++ b/canvas/package-lock.json
--- a/canvas/package.json
+++ b/canvas/package.json
@ -32,13 +32,13 @@
    "@playwright/test": "^1.59.1",
    "@testing-library/jest-dom": "^6.6.0",
    "@testing-library/react": "^16.1.0",
-    "@types/node": "^22.0.0",
+    "@types/node": "^25.6.0",
    "@types/react": "^19.0.0",
    "@types/react-dom": "^19.0.0",
    "@vitejs/plugin-react": "^6.0.1",
    "@vitest/coverage-v8": "^4.1.5",
    "autoprefixer": "^10.4.0",
-    "jsdom": "^25.0.0",
+    "jsdom": "^29.1.0",
    "postcss": "^8.5.12",
    "tailwindcss": "^3.4.0",
    "typescript": "^5.7.0",
--- a/scripts/build_runtime_package.py
+++ b/scripts/build_runtime_package.py
@ -83,6 +83,7 @@ SUBPACKAGES = {
    "adapters",
    "builtin_tools",
    "lib",
+    "platform_tools",
    "plugins_registry",
    "policies",
    "skill_loader",
--- a/scripts/measure-coordinator-task-bounds.sh
+++ b/scripts/measure-coordinator-task-bounds.sh
@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+#
+# Measure platform-side bounds (or absence thereof) on a coordinator's
+# task execution. Reproduction harness for Issue 4 of the 2026-04-28
+# CP review, surfaced in the RFC at molecule-core#2251.
+#
+# What Issue 4 hypothesized
+# -------------------------
+# A coordinator workspace receives an A2A kickoff, delegates to children,
+# then enters a synthesis phase whose duration the platform does not
+# bound. `DELEGATION_TIMEOUT` (300s, in workspace/builtin_tools/
+# delegation.py) governs the parent→child HTTP request, NOT the
+# coordinator's own task-execution budget. So a coordinator that's
+# spent 10min synthesizing past delegation will keep going until the
+# LLM returns or its host runtime crashes — never bounded by a platform
+# ceiling.
+#
+# Issue 4 explicitly hedged ("This isn't necessarily a platform bug —
+# could be that the Design Director's system prompt told it to do
+# complex synthesis work that exceeded the A2A response window"). This
+# script is the empirical test of which side that ambiguity lands on.
+#
+# What this script does NOT do
+# ----------------------------
+# - It does NOT assert pass/fail. The "bug" is absence-of-bound, which
+#   is hard to assert in a single run. The script outputs measurement
+#   data; the team interprets.
+# - It does NOT simulate a coordinator hang via runtime modification.
+#   Instead, it drives a real coordinator with a synthesis-heavy task
+#   and observes the duration the platform tolerates.
+# - It does NOT clean up on failure. Use scripts/cleanup-rogue-workspaces.sh.
+#
+# What "bug confirmed" looks like (per Issue 4)
+# ---------------------------------------------
+#   coordinator_response_secs > 300 AND no platform_intervention=true
+#   in the heartbeat trace → coordinator ran past DELEGATION_TIMEOUT
+#   (HTTP-level) without any platform ceiling kicking in. The RFC's
+#   V1.0 operator ceiling would convert this into an explicit
+#   `terminated` response at MAX_TASK_EXECUTION_SECS.
+#
+# What "bug refuted" looks like
+# -----------------------------
+#   coordinator_response_secs cleanly bounded by either the LLM API
+#   timeout or some other platform mechanism → Issue 4's premise that
+#   "no platform-enforced timeout" is wrong, V1.0 of the RFC needs
+#   re-justification.
+#
+# Usage
+# -----
+#   PLATFORM=http://localhost:8080 OPENROUTER_API_KEY=... \
+#     bash scripts/measure-coordinator-task-bounds.sh
+#
+# Or against staging-api (requires a tenant admin token):
+#
+#   PLATFORM=https://your-staging-tenant.example \
+#   OPENROUTER_API_KEY=... \
+#     bash scripts/measure-coordinator-task-bounds.sh
+#
+set -euo pipefail
+
+PLATFORM="${PLATFORM:-http://localhost:8080}"
+OR_KEY="${OPENROUTER_API_KEY:-${OPENAI_API_KEY:?Set OPENROUTER_API_KEY (or OPENAI_API_KEY)}}"
+# Synthesis prompt knob — choose the size of the post-delegation work
+# the coordinator is asked to do. Default exercises 3 delegation rounds
+# with non-trivial aggregation.
+SYNTHESIS_DEPTH="${SYNTHESIS_DEPTH:-3}"
+# Max time we'll wait on the coordinator's A2A response before giving
+# up on this measurement. Set generously (10min) so we don't truncate
+# a slow-but-eventually-completing case.
+A2A_TIMEOUT="${A2A_TIMEOUT:-600}"
+
+ts() { date -u +%Y-%m-%dT%H:%M:%S.%3NZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%SZ; }
+
+emit() {
+  # One JSON line per event so the output is machine-readable.
+  printf '{"ts":"%s","event":"%s","data":%s}\n' "$(ts)" "$1" "${2:-null}"
+}
+
+emit "run_started" "{\"platform\":\"$PLATFORM\",\"synthesis_depth\":$SYNTHESIS_DEPTH,\"a2a_timeout_secs\":$A2A_TIMEOUT}"
+
+# ---- Setup: coordinator + 1 child ----
+emit "provisioning_pm" null
+R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \
+  -d '{"name":"PM","role":"Coordinator — delegates and synthesizes","tier":2,"template":"claude-code-default"}')
+PM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
+[ -n "$PM_ID" ] || { echo "ERROR: PM create failed: $R" >&2; exit 1; }
+emit "pm_provisioned" "{\"workspace_id\":\"$PM_ID\"}"
+
+emit "provisioning_child" null
+R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \
+  -d '{"name":"Researcher","role":"Returns short research findings","tier":2,"template":"langgraph"}')
+CHILD_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
+[ -n "$CHILD_ID" ] || { echo "ERROR: child create failed: $R" >&2; exit 1; }
+emit "child_provisioned" "{\"workspace_id\":\"$CHILD_ID\"}"
+
+curl -s -X PATCH "$PLATFORM/workspaces/$CHILD_ID" -H 'Content-Type: application/json' \
+  -d "{\"parent_id\":\"$PM_ID\"}" > /dev/null
+curl -s -X POST "$PLATFORM/workspaces/$CHILD_ID/secrets" -H 'Content-Type: application/json' \
+  -d "{\"key\":\"OPENROUTER_API_KEY\",\"value\":\"$OR_KEY\"}" > /dev/null
+
+# ---- Wait for both online ----
+wait_online() {
+  local id="$1"; local label="$2"
+  for i in $(seq 1 30); do
+    s=$(curl -s "$PLATFORM/workspaces/$id" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null)
+    [ "$s" = "online" ] && { emit "online" "{\"workspace\":\"$label\",\"after_polls\":$i}"; return 0; }
+    sleep 3
+  done
+  emit "online_timeout" "{\"workspace\":\"$label\"}"
+  return 1
+}
+wait_online "$PM_ID"    "PM"    || exit 2
+wait_online "$CHILD_ID" "child" || exit 2
+
+# ---- Build a synthesis-heavy kickoff task ----
+# The task asks the coordinator to delegate N times, each time with a
+# different sub-question, then aggregate findings into a single report.
+# The synthesis phase happens entirely inside the coordinator's A2A
+# handler post-delegation, which is the exact code path Issue 4 named.
+TASK="You are coordinating a research analysis. Delegate $SYNTHESIS_DEPTH separate sub-questions to the Researcher (one at a time, sequentially — wait for each response before sending the next), then synthesize all findings into a single coherent report. Sub-questions: (a) historical context of distributed consensus, (b) modern Byzantine-fault-tolerant protocols, (c) practical trade-offs between Raft and Paxos. After all delegations complete, write a 600-word synthesis comparing the three responses and drawing one cross-cutting insight. Do not respond until the synthesis is complete."
+
+# ---- Time the A2A kickoff round-trip ----
+emit "a2a_kickoff_sent" "{\"to\":\"$PM_ID\",\"task_chars\":${#TASK}}"
+START_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
+
+# Use --max-time to bound this measurement (else the script could itself
+# hang past sensible limits). The bound is a measurement-side timeout,
+# NOT a platform-side timeout — the latter is what we're trying to
+# detect.
+RESP=$(curl -s --max-time "$A2A_TIMEOUT" -X POST "$PLATFORM/workspaces/$PM_ID/a2a" \
+  -H "Content-Type: application/json" \
+  -d "$(python3 -c "
+import json,sys
+print(json.dumps({
+  'method':'message/send',
+  'params':{
+    'message':{
+      'role':'user',
+      'parts':[{'type':'text','text':sys.argv[1]}]
+    }
+  }
+}))
+" "$TASK")" || RESP="<curl_failed_or_timed_out>")
+
+END_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
+ELAPSED_SECS=$(python3 -c "print(round(($END_NS - $START_NS) / 1e9, 2))")
+
+emit "a2a_response_observed" "{\"elapsed_secs\":$ELAPSED_SECS,\"response_chars\":${#RESP},\"response_head\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1][:200]))" "$RESP")}"
+
+# ---- Pull heartbeat trace from the platform ----
+# The heartbeat endpoint records workspace liveness pings. If the
+# platform implements per-task bounds, the trace will show a status
+# transition (e.g. terminated) within the run window. Absence of any
+# such transition over a 10min synthesis is the empirical evidence
+# that no platform ceiling fired.
+emit "fetching_heartbeat_trace" null
+HB=$(curl -s "$PLATFORM/workspaces/$PM_ID/heartbeat-history?since_secs=$A2A_TIMEOUT" 2>&1 || echo "<endpoint_unavailable>")
+emit "heartbeat_trace" "{\"raw\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1]))" "$HB")}"
+
+# ---- Summary ----
+emit "run_completed" "{\"elapsed_secs\":$ELAPSED_SECS,\"pm_id\":\"$PM_ID\",\"child_id\":\"$CHILD_ID\"}"
+
+cat <<EOF >&2
+
+=========================================
+  Measurement complete.
+  Coordinator response time: ${ELAPSED_SECS}s
+  PM workspace:    $PM_ID
+  Child workspace: $CHILD_ID
+=========================================
+
+Interpretation guide:
+
+  ELAPSED_SECS < 60   → Synthesis completed quickly; not informative
+                        about platform bounds (LLM was just fast).
+                        Re-run with SYNTHESIS_DEPTH=8 to force longer
+                        synthesis.
+
+  60 <= ELAPSED < 300 → Within DELEGATION_TIMEOUT. Doesn't prove or
+                        refute Issue 4 — the HTTP-level timeout would
+                        be sufficient if synthesis happened to fall
+                        under it.
+
+  ELAPSED >= 300      → BUG CONFIRMED IF heartbeat_trace shows no
+                        platform-side transition. Coordinator ran past
+                        DELEGATION_TIMEOUT without any platform ceiling
+                        kicking in — exactly the gap the RFC V1.0 plans
+                        to close with MAX_TASK_EXECUTION_SECS.
+
+  curl_failed_or_timed_out → \$A2A_TIMEOUT exceeded. Either the
+                        coordinator is genuinely hung (likely) or
+                        synthesis is just very slow. Pull workspace
+                        status separately to disambiguate.
+
+Cleanup:
+  curl -X DELETE $PLATFORM/workspaces/$PM_ID
+  curl -X DELETE $PLATFORM/workspaces/$CHILD_ID
+
+EOF
--- a/workspace-server/go.mod
+++ b/workspace-server/go.mod
@ -9,45 +9,45 @@ require (
 	github.com/alicebob/miniredis/v2 v2.37.0
 	github.com/creack/pty v1.1.18
 	github.com/docker/docker v28.5.2+incompatible
-	github.com/docker/go-connections v0.6.0
-	github.com/gin-contrib/cors v1.7.2
-	github.com/gin-gonic/gin v1.10.0
+	github.com/docker/go-connections v0.7.0
+	github.com/gin-contrib/cors v1.7.7
+	github.com/gin-gonic/gin v1.12.0
 	github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1
-	github.com/golang-jwt/jwt/v5 v5.2.2
+	github.com/golang-jwt/jwt/v5 v5.3.1
 	github.com/google/uuid v1.6.0
 	github.com/gorilla/websocket v1.5.3
 	github.com/lib/pq v1.10.9
 	github.com/opencontainers/image-spec v1.1.1
-	github.com/redis/go-redis/v9 v9.7.3
+	github.com/redis/go-redis/v9 v9.19.0
 	github.com/robfig/cron/v3 v3.0.1
-	golang.org/x/crypto v0.49.0
+	golang.org/x/crypto v0.50.0
 	gopkg.in/yaml.v3 v3.0.1
 )

 require (
-	github.com/Microsoft/go-winio v0.4.21 // indirect
-	github.com/bytedance/sonic v1.11.6 // indirect
-	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/Microsoft/go-winio v0.6.2 // indirect
+	github.com/bytedance/gopkg v0.1.3 // indirect
+	github.com/bytedance/sonic v1.15.0 // indirect
+	github.com/bytedance/sonic/loader v0.5.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.4 // indirect
-	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/cloudwego/base64x v0.1.6 // indirect
 	github.com/containerd/errdefs v1.0.0 // indirect
 	github.com/containerd/errdefs/pkg v0.3.0 // indirect
 	github.com/containerd/log v0.1.0 // indirect
-	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/distribution/reference v0.6.0 // indirect
 	github.com/docker/go-units v0.5.0 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
-	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
-	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.12 // indirect
+	github.com/gin-contrib/sse v1.1.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-playground/locales v0.14.1 // indirect
 	github.com/go-playground/universal-translator v0.18.1 // indirect
-	github.com/go-playground/validator/v10 v10.20.0 // indirect
-	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/go-playground/validator/v10 v10.30.1 // indirect
+	github.com/goccy/go-json v0.10.5 // indirect
+	github.com/goccy/go-yaml v1.19.2 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
@ -57,11 +57,14 @@ require (
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/morikuni/aec v1.1.0 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
-	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	github.com/quic-go/qpack v0.6.0 // indirect
+	github.com/quic-go/quic-go v0.59.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/ugorji/go/codec v1.3.1 // indirect
 	github.com/yuin/gopher-lua v1.1.1 // indirect
+	go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect
 	go.opentelemetry.io/otel v1.43.0 // indirect
@ -70,10 +73,11 @@ require (
 	go.opentelemetry.io/otel/sdk v1.43.0 // indirect
 	go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect
 	go.opentelemetry.io/otel/trace v1.43.0 // indirect
-	golang.org/x/arch v0.8.0 // indirect
+	go.uber.org/atomic v1.11.0 // indirect
+	golang.org/x/arch v0.23.0 // indirect
 	golang.org/x/net v0.52.0 // indirect
-	golang.org/x/sys v0.42.0 // indirect
-	golang.org/x/text v0.35.0 // indirect
+	golang.org/x/sys v0.43.0 // indirect
+	golang.org/x/text v0.36.0 // indirect
 	golang.org/x/time v0.15.0 // indirect
 	google.golang.org/protobuf v1.36.11 // indirect
 	gotest.tools/v3 v3.5.2 // indirect
--- a/workspace-server/go.sum
+++ b/workspace-server/go.sum
@ -2,8 +2,8 @@ github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEK
 github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
 github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
-github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro=
-github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
+github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
+github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f h1:YkLRhUg+9qr9OV9N8dG1Hj0Ml7TThHlRwh5F//oUJVs=
 github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f/go.mod h1:NqdtlWZDJvpXNJRHnMkPhTKHdA1LZTNH+63TB66JSOU=
 github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d h1:GpYhP6FxaJZc1Ljy5/YJ9ZIVGvfOqZBmDolNr2S5x2g=
@ -14,18 +14,18 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
 github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
 github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
 github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
-github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
-github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
-github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
-github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
+github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
+github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
+github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
+github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE=
+github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
 github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
 github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
-github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
-github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
-github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
+github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
 github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
 github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
@ -37,26 +37,24 @@ github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
-github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
 github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM=
 github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
-github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
+github.com/docker/go-connections v0.7.0 h1:6SsRfJddP22WMrCkj19x9WKjEDTB+ahsdiGYf0mN39c=
+github.com/docker/go-connections v0.7.0/go.mod h1:no1qkHdjq7kLMGUXYAduOhYPSJxxvgWBh7ogVvptn3Q=
 github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
-github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
-github.com/gin-contrib/cors v1.7.2 h1:oLDHxdg8W/XDoN/8zamqk/Drgt4oVZDvaV0YmvVICQw=
-github.com/gin-contrib/cors v1.7.2/go.mod h1:SUJVARKgQ40dmrzgXEVxj2m7Ig1v1qIboQkPDTQ9t2E=
-github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
-github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
-github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
-github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw=
+github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
+github.com/gin-contrib/cors v1.7.7 h1:Oh9joP463x7Mw72vhvJ61YQm8ODh9b04YR7vsOErD0Q=
+github.com/gin-contrib/cors v1.7.7/go.mod h1:K5tW0RkzJtWSiOdikXloy8VEZlgdVNpHNw8FpjUPNrE=
+github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
+github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
+github.com/gin-gonic/gin v1.12.0 h1:b3YAbrZtnf8N//yjKeU2+MQsh2mY5htkZidOM7O0wG8=
+github.com/gin-gonic/gin v1.12.0/go.mod h1:VxccKfsSllpKshkBWgVgRniFFAzFb9csfngsqANjnLc=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@ -68,14 +66,16 @@ github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/o
 github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
 github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
 github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
-github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/go-playground/validator/v10 v10.30.1 h1:f3zDSN/zOma+w6+1Wswgd9fLkdwy06ntQJp0BBvFG0w=
+github.com/go-playground/validator/v10 v10.30.1/go.mod h1:oSuBIQzuJxL//3MelwSLD5hc2Tu889bF0Idm9Dg26cM=
 github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 h1:wG8n/XJQ07TmjbITcGiUaOtXxdrINDz1b0J1w0SzqDc=
 github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1/go.mod h1:A2S0CWkNylc2phvKXWBBdD3K0iGnDBGbzRpISP2zBl8=
-github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
-github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
-github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
-github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
+github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
+github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
+github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
+github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@ -88,10 +88,8 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l8
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
-github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
-github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@ -121,41 +119,45 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
 github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
-github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
-github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
+github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
-github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
+github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
+github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
+github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw=
+github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
+github.com/redis/go-redis/v9 v9.19.0 h1:XPVaaPSnG6RhYf7p+rmSa9zZfeVAnWsH5h3lxthOm/k=
+github.com/redis/go-redis/v9 v9.19.0/go.mod h1:v/M13XI1PVCDcm01VtPFOADfZtHf8YW3baQf57KlIkA=
 github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
 github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
-github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
-github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
-github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
-github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/ugorji/go/codec v1.3.1 h1:waO7eEiFDwidsBN6agj1vJQ4AG7lh2yqXyOXqhgQuyY=
+github.com/ugorji/go/codec v1.3.1/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
 github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
 github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
+github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
+github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
+go.mongodb.org/mongo-driver/v2 v2.5.0 h1:yXUhImUjjAInNcpTcAlPHiT7bIXhshCTL3jVBkF3xaE=
+go.mongodb.org/mongo-driver/v2 v2.5.0/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
@ -176,21 +178,21 @@ go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09
 go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
 go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g=
 go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk=
-golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
-golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
-golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
-golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
-golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
+go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
+go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y=
+go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU=
+golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg=
+golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
+golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
+golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
 golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
 golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
-golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
-golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
-golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
-golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
+golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
+golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
 golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
 golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
 google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
@ -209,5 +211,3 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
 gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
-nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
-rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
--- a/workspace/a2a_mcp_server.py
+++ b/workspace/a2a_mcp_server.py
@ -27,6 +27,7 @@ from a2a_tools import (
    tool_recall_memory,
    tool_send_message_to_user,
 )
+from platform_tools.registry import TOOLS as _PLATFORM_TOOL_SPECS

 logger = logging.getLogger(__name__)

@ -45,158 +46,27 @@ from a2a_client import (  # noqa: F401, E402
 from a2a_tools import report_activity  # noqa: F401, E402

 # --- Tool definitions (schemas) ---
+#
+# Built once at import time from the platform_tools registry. The MCP
+# `description` field is the spec's `short` line — that's the unified
+# tool description used by both the MCP tool listing AND the bullet
+# rendering in the agent-facing system-prompt section. The deeper
+# `when_to_use` guidance is appended to the system prompt only (it's
+# too long to live in MCP `description` without bloating every
+# tool-list response the model sees).

 TOOLS = [
    {
-        "name": "delegate_task",
-        "description": "Delegate a task to another workspace via A2A protocol and WAIT for the response. Use for quick tasks. The target must be a peer (sibling or parent/child). Use list_peers to find available targets.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "workspace_id": {
-                    "type": "string",
-                    "description": "Target workspace ID (from list_peers)",
-                },
-                "task": {
-                    "type": "string",
-                    "description": "The task description to send to the target workspace",
-                },
-            },
-            "required": ["workspace_id", "task"],
-        },
-    },
-    {
-        "name": "delegate_task_async",
-        "description": "Send a task to another workspace with a short timeout (fire-and-forget). Returns immediately — the target continues processing. Best when you don't need the result right away. Note: check_task_status may not work with all workspace implementations.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "workspace_id": {
-                    "type": "string",
-                    "description": "Target workspace ID (from list_peers)",
-                },
-                "task": {
-                    "type": "string",
-                    "description": "The task description to send to the target workspace",
-                },
-            },
-            "required": ["workspace_id", "task"],
-        },
-    },
-    {
-        "name": "check_task_status",
-        "description": "Check the status of a previously submitted async task via tasks/get. Note: only works if the target workspace's A2A implementation supports task persistence. May return 'not found' for completed tasks.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "workspace_id": {
-                    "type": "string",
-                    "description": "The workspace ID the task was sent to",
-                },
-                "task_id": {
-                    "type": "string",
-                    "description": "The task_id returned by delegate_task_async",
-                },
-            },
-            "required": ["workspace_id", "task_id"],
-        },
-    },
-    {
-        "name": "list_peers",
-        "description": "List all workspaces this agent can communicate with (siblings and parent/children). Returns name, ID, status, and role for each peer.",
-        "inputSchema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "get_workspace_info",
-        "description": "Get this workspace's own info — ID, name, role, tier, parent, status.",
-        "inputSchema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "send_message_to_user",
-        "description": "Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "message": {
-                    "type": "string",
-                    # The "no URLs in message text" rule is the single biggest
-                    # cause of bad chat UX: agents drop catbox.moe / file://
-                    # / temporary upload-host links into the prose, the
-                    # canvas renders them as plain markdown links the user
-                    # can't preview, and SaaS deployments often can't even
-                    # reach those external hosts. Every download MUST go
-                    # through the structured `attachments` field below.
-                    "description": (
-                        "Caption text for the chat bubble. Required even when sending "
-                        "attachments — set to a short label like 'Here's the build:' "
-                        "or 'Done — see attached.'\n\n"
-                        "DO NOT paste file URLs, download links, or container paths in "
-                        "this string. Files MUST go through the `attachments` field, "
-                        "which renders as a clickable download chip and works on SaaS "
-                        "deployments where external file-host URLs (catbox.moe, file://, "
-                        "etc.) are unreachable from the user's browser."
-                    ),
-                },
-                "attachments": {
-                    "type": "array",
-                    "description": (
-                        "REQUIRED for any file delivery. Pass absolute file paths inside "
-                        "THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) "
-                        "— the platform uploads each file and returns a download chip "
-                        "with the file's icon + name + size in the user's chat. The chip "
-                        "works in SaaS deployments because the URL is platform-served, "
-                        "not an external host.\n\n"
-                        "USE THIS instead of: pasting URLs in `message`, base64-encoding "
-                        "in the body, or telling the user to look at a path on disk. "
-                        "If the file isn't already on disk, write it first (Bash, Write "
-                        "tool, etc.) then pass its path here. 25 MB per file cap."
-                    ),
-                    "items": {"type": "string"},
-                },
-            },
-            "required": ["message"],
-        },
-    },
-    {
-        "name": "commit_memory",
-        "description": "Append a new memory row to persistent storage. Each call CREATES a row — does not overwrite existing memories with the same content. Use to remember decisions, task results, and context that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org). GLOBAL writes require tier-0 (root) workspace; lower-tier callers get an RBAC error.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "content": {
-                    "type": "string",
-                    "description": "The information to remember — be detailed and specific",
-                },
-                "scope": {
-                    "type": "string",
-                    "enum": ["LOCAL", "TEAM", "GLOBAL"],
-                    "description": "Memory scope (default: LOCAL)",
-                },
-            },
-            "required": ["content"],
-        },
-    },
-    {
-        "name": "recall_memory",
-        "description": "Substring-search persistent memory and return ALL matching rows (no pagination). Empty query returns every memory accessible at the given scope. Server-side filter is case-insensitive substring match on `content`. Use at the start of conversations to recall prior context — calling once with empty query is cheap and avoids missing relevant memories that don't match a narrow keyword.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "Search query (empty returns all memories)",
-                },
-                "scope": {
-                    "type": "string",
-                    "enum": ["LOCAL", "TEAM", "GLOBAL", ""],
-                    "description": "Filter by scope (empty returns all accessible)",
-                },
-            },
-        },
-    },
+        "name": _spec.name,
+        "description": _spec.short,
+        "inputSchema": _spec.input_schema,
+    }
+    for _spec in _PLATFORM_TOOL_SPECS
 ]


+
+
 # --- Tool dispatch ---

 async def handle_tool_call(name: str, arguments: dict) -> str:
--- a/workspace/adapter_base.py
+++ b/workspace/adapter_base.py
@ -421,8 +421,8 @@ class BaseAdapter(ABC):
        from coordinator import get_children, get_parent_context, build_children_description
        from prompt import build_system_prompt, get_peer_capabilities, get_platform_instructions
        from builtin_tools.approval import request_approval
-        from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
-        from builtin_tools.memory import commit_memory, search_memory
+        from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
+        from builtin_tools.memory import commit_memory, recall_memory
        from builtin_tools.sandbox import run_code

        platform_url = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
@ -455,8 +455,14 @@ class BaseAdapter(ABC):
                    seen_skill_ids.add(skill.metadata.id)
        logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")

-        # Assemble tools: 6 core + skill tools
-        all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
+        # Core platform tools — names mirror the platform_tools registry,
+        # so the names referenced in get_a2a_instructions/get_hma_instructions
+        # are guaranteed to exist as @tool symbols here. The structural
+        # alignment test in tests/test_platform_tools.py pins this.
+        all_tools = [
+            delegate_task, delegate_task_async, check_task_status,
+            request_approval, commit_memory, recall_memory, run_code,
+        ]
        for skill in loaded_skills:
            all_tools.extend(skill.tools)

--- a/workspace/builtin_tools/delegation.py
+++ b/workspace/builtin_tools/delegation.py
@ -2,7 +2,7 @@

 Delegations are non-blocking: the tool fires the A2A request in the background
 and returns immediately with a task_id. The agent can check status anytime via
-check_delegation_status, or just continue working and check later.
+check_task_status, or just continue working and check later.

 When the delegate responds, the result is stored and the agent is notified
 via a status update.
@ -44,7 +44,7 @@ class DelegationStatus(str, Enum):
    # The reply will arrive via the platform's stitch path when the
    # peer finishes its current work. The LLM should WAIT, not retry,
    # and definitely not fall back to doing the work itself — see the
-    # check_delegation_status docstring for the prompt-side guidance.
+    # check_task_status docstring for the prompt-side guidance.
    QUEUED = "queued"
    COMPLETED = "completed"
    FAILED = "failed"
@ -110,7 +110,7 @@ async def _record_delegation_on_platform(task_id: str, target_workspace_id: str,
    Best-effort POST to /workspaces/<self>/delegations/record. The agent still
    fires A2A directly for speed + OTEL propagation, but the platform's
    GET /delegations endpoint now mirrors the same set an agent's local
-    check_delegation_status sees.
+    check_task_status sees.
    """
    try:
        async with httpx.AsyncClient(timeout=10) as client:
@ -129,11 +129,11 @@ async def _record_delegation_on_platform(task_id: str, target_workspace_id: str,
 async def _refresh_queued_from_platform(task_id: str) -> bool:
    """Lazy-refresh a QUEUED delegation's local state from the platform.

-    Called by check_delegation_status when local status is QUEUED. The
+    Called by check_task_status when local status is QUEUED. The
    platform's drain stitch (a2a_queue.go) updates the delegate_result
    activity_logs row when a queued delegation eventually completes,
    but it has no callback to this runtime — without this lazy refresh,
-    the LLM polling check_delegation_status would see "queued" forever
+    the LLM polling check_task_status would see "queued" forever
    even after the platform has the result.

    Returns True if the local delegation was updated to a terminal state
@ -215,7 +215,7 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
    delegation.status = DelegationStatus.IN_PROGRESS

    # #64: register on the platform so GET /workspaces/<self>/delegations
-    # sees the same set as check_delegation_status. Best-effort — platform
+    # sees the same set as check_task_status. Best-effort — platform
    # unreachability must not block the actual A2A delegation.
    await _record_delegation_on_platform(task_id, workspace_id, task)

@ -286,7 +286,7 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
                    # accepted the request but the peer's runtime is
                    # mid-task. Platform-side drain will deliver the
                    # reply asynchronously. Mark QUEUED locally so
-                    # check_delegation_status can surface that state
+                    # check_task_status can surface that state
                    # to the LLM with explicit "wait, don't bypass"
                    # guidance. Do NOT mark FAILED — the request is
                    # alive in the platform's queue, not lost.
@ -371,14 +371,36 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):


@tool
-async def delegate_to_workspace(
+async def delegate_task(
+    workspace_id: str,
+    task: str,
+) -> str:
+    """Delegate a task to a peer workspace via A2A and WAIT for the response.
+
+    Synchronous variant — blocks until the peer replies (or the platform's
+    A2A round-trip times out). Use this for QUICK questions and small
+    sub-tasks where you can afford to wait inline.
+
+    For longer-running work (research, multi-minute jobs) use
+    delegate_task_async + check_task_status instead so you don't hold
+    this workspace busy waiting.
+
+    Tool name + description are sourced from the platform_tools registry —
+    a single ToolSpec drives MCP, LangChain, and system-prompt docs.
+    """
+    from a2a_tools import tool_delegate_task
+    return await tool_delegate_task(workspace_id, task)
+
+
+@tool
+async def delegate_task_async(
    workspace_id: str,
    task: str,
 ) -> dict:
    """Delegate a task to a peer workspace via A2A protocol (non-blocking).

    Sends the task in the background and returns immediately with a task_id.
-    Use check_delegation_status to poll for the result, or continue working
+    Use check_task_status to poll for the result, or continue working
    and check later. The delegate works independently.

    Args:
@ -386,7 +408,7 @@ async def delegate_to_workspace(
        task: The task description to send to the peer.

    Returns:
-        A dict with task_id and status="delegated". Use check_delegation_status(task_id) to get results.
+        A dict with task_id and status="delegated". Use check_task_status(task_id) to get results.
    """
    task_id = str(uuid.uuid4())

@ -417,12 +439,12 @@ async def delegate_to_workspace(
        "success": True,
        "task_id": task_id,
        "status": "delegated",
-        "message": f"Task delegated to {workspace_id}. Use check_delegation_status('{task_id}') to get the result when ready.",
+        "message": f"Task delegated to {workspace_id}. Use check_task_status('{task_id}') to get the result when ready.",
    }


@tool
-async def check_delegation_status(
+async def check_task_status(
    task_id: str = "",
 ) -> dict:
    """Check the status of a delegated task, or list all active delegations.
@ -434,7 +456,7 @@ async def check_delegation_status(
      processing a prior task. The reply WILL arrive — the platform's
      drain re-dispatches when the peer is free. This tool transparently
      polls the platform for the eventual outcome on each call, so
-      keep polling check_delegation_status periodically and you'll see
+      keep polling check_task_status periodically and you'll see
      the status flip to "completed" / "failed" automatically.
      Do NOT retry the delegation. Do NOT do the work yourself.
      Acknowledge to the user that the peer is busy and will reply,
@ -445,7 +467,7 @@ async def check_delegation_status(
      yourself if status is "failed", never if status is "queued".

    Args:
-        task_id: The task_id returned by delegate_to_workspace. If empty, lists all delegations.
+        task_id: The task_id returned by delegate_task_async. If empty, lists all delegations.

    Returns:
        Status and result (if completed) of the delegation.
@ -493,4 +515,14 @@ async def check_delegation_status(
    elif delegation.status == DelegationStatus.FAILED:
        result["error"] = delegation.error

+    # RFC #2251 V1.0 reproduction-harness instrumentation. Every poll of
+    # check_task_status emits a phase=check_status line so the harness
+    # operator can tell whether a coordinator stuck for 8 minutes was
+    # polling-children-the-whole-time vs synthesizing-after-children-done.
+    # `grep rfc2251_phase=check_status` in the workspace's container log
+    # gives the polling pattern. Strip when V1.0 ships.
+    logger.info(
+        "rfc2251_phase=check_status task_id=%s peer=%s status=%s",
+        task_id, delegation.workspace_id, delegation.status.value,
+    )
    return result
--- a/workspace/builtin_tools/memory.py
+++ b/workspace/builtin_tools/memory.py
@ -8,7 +8,7 @@ Hierarchical Memory Architecture:
 RBAC enforcement
 ----------------
 ``commit_memory`` requires the ``"memory.write"`` action.
-``search_memory`` requires the ``"memory.read"`` action.
+``recall_memory`` requires the ``"memory.read"`` action.
 Roles are read from ``config.yaml`` under ``rbac.roles`` (default: operator).

 Audit trail
@ -188,7 +188,7 @@ async def commit_memory(content: str, scope: str = "LOCAL") -> dict:


@tool
-async def search_memory(query: str = "", scope: str = "") -> dict:
+async def recall_memory(query: str = "", scope: str = "") -> dict:
    """Search stored memories.

    Args:
--- a/workspace/coordinator.py
+++ b/workspace/coordinator.py
@ -81,7 +81,7 @@ def build_children_description(children: list[dict]) -> str:
        children,
        heading="## Your Team (sub-workspaces you coordinate)",
        instruction=(
-            "Use the `delegate_to_workspace` tool to send tasks to the chosen member. "
+            "Use the `delegate_task_async` tool to send tasks to the chosen member. "
            "Only delegate to members listed above."
        ),
    )
@ -92,7 +92,7 @@ def build_children_description(children: list[dict]) -> str:
            "",
            "### Coordination Rules — MANDATORY",
            "1. You are a COORDINATOR. Your ONLY job is to delegate and synthesize. NEVER do the work yourself.",
-            "2. For EVERY task, use `delegate_to_workspace` to send it to the appropriate team member(s). "
+            "2. For EVERY task, use `delegate_task_async` to send it to the appropriate team member(s). "
            "Do this BEFORE writing any analysis, code, or research yourself.",
            "3. If a task spans multiple members, delegate to ALL of them in parallel and aggregate results.",
            "4. If ALL members are offline/paused, tell the caller which members are unavailable. "
@ -120,23 +120,56 @@ async def route_task_to_team(
        task: The task description to route.
        preferred_member_id: Optional — directly delegate to this member.
    """
-    from builtin_tools.delegation import delegate_to_workspace as delegate
+    import time
+    from builtin_tools.delegation import delegate_task_async as delegate
+
+    # RFC #2251 V1.0 reproduction-harness instrumentation. Phase-tagged log
+    # lines correlate with scripts/measure-coordinator-task-bounds.sh's
+    # external timing trace, so an operator running the harness against
+    # staging can answer "what phase was the coordinator in at minute 7?".
+    # `grep rfc2251_phase` on the workspace's container logs is the query.
+    # Strip when V1.0 ships and the phase data lands in the structured
+    # heartbeat payload instead.
+    _phase_t0 = time.monotonic()
+    logger.info(
+        "rfc2251_phase=route_start task_chars=%d preferred_member_id=%s",
+        len(task), preferred_member_id or "none",
+    )

    children = await get_children()
+    logger.info(
+        "rfc2251_phase=children_fetched count=%d elapsed_ms=%d",
+        len(children), int((time.monotonic() - _phase_t0) * 1000),
+    )
+
    decision = build_team_routing_payload(
        children,
        task=task,
        preferred_member_id=preferred_member_id,
    )
+    logger.info(
+        "rfc2251_phase=routing_decided action=%s elapsed_ms=%d",
+        decision.get("action", "unknown"), int((time.monotonic() - _phase_t0) * 1000),
+    )

    if decision.get("action") == "delegate_to_preferred_member":
        # Async delegation — returns immediately with task_id
+        target = decision["preferred_member_id"]
+        logger.info(
+            "rfc2251_phase=delegate_invoked target=%s elapsed_ms=%d",
+            target, int((time.monotonic() - _phase_t0) * 1000),
+        )
        result = await delegate.ainvoke(
-            {
-                "workspace_id": decision["preferred_member_id"],
-                "task": task,
-            }
+            {"workspace_id": target, "task": task}
+        )
+        logger.info(
+            "rfc2251_phase=delegate_returned target=%s task_id=%s elapsed_ms=%d",
+            target, result.get("task_id", "n/a"), int((time.monotonic() - _phase_t0) * 1000),
        )
        return result

+    logger.info(
+        "rfc2251_phase=route_returning_decision_only elapsed_ms=%d",
+        int((time.monotonic() - _phase_t0) * 1000),
+    )
    return decision
--- a/workspace/executor_helpers.py
+++ b/workspace/executor_helpers.py
@ -273,29 +273,19 @@ def get_system_prompt(config_path: str, fallback: str | None = None) -> str | No
    return fallback


-_A2A_INSTRUCTIONS_MCP = """## Inter-Agent Communication
-You have MCP tools for communicating with other workspaces:
- list_peers: discover available peer workspaces (name, ID, status, role)
- delegate_task: send a task and WAIT for the response (for quick tasks)
- delegate_task_async: send a task and return immediately with a task_id (for long tasks)
- check_task_status: poll an async task's status and get results when done
- get_workspace_info: get your own workspace info
-
-For quick questions, use delegate_task (synchronous).
-For long-running work (building pages, running audits), use delegate_task_async + check_task_status.
-Always use list_peers first to discover available workspace IDs.
-Access control is enforced — you can only reach siblings and parent/children.
-
-PROACTIVE MESSAGING: Use send_message_to_user to push messages to the user's chat at ANY time:
- Acknowledge tasks immediately: "Got it, delegating to the team now..."
- Send progress updates during long work: "Research Lead finished, waiting on Dev Lead..."
- Deliver follow-up results: "All teams reported back. Here's the synthesis: ..."
-This lets you respond quickly ("I'll work on this") and come back later with results.
-
-If delegate_task returns a DELEGATION FAILED message, do NOT forward the raw error to the user.
-Instead: (1) try delegating to a different peer, (2) handle the task yourself, or
-(3) tell the user which peer is unavailable and provide your own best answer."""
+# Tool-usage instructions for system-prompt injection. Generated from
+# the platform_tools registry — every tool name, description, and usage
+# guidance comes from the canonical ToolSpec. Adding/renaming a tool in
+# registry.py automatically flows through here.

+_A2A_FOOTER = (
+    "Always use list_peers first to discover available workspace IDs. "
+    "Access control is enforced — you can only reach siblings and parent/children. "
+    "If a delegation returns a DELEGATION FAILED message, do NOT forward "
+    "the raw error to the user. Instead: (1) try a different peer, "
+    "(2) handle the task yourself, or (3) tell the user which peer is "
+    "unavailable and provide your own best answer."
+)

 _A2A_INSTRUCTIONS_CLI = """## Inter-Agent Communication
 You can delegate tasks to other workspaces using the a2a command:
@ -309,39 +299,55 @@ For quick questions, use sync delegate. For long tasks, use --async + status.
 Only delegate to peers listed by the peers command (access control enforced)."""


+def _render_section(heading: str, specs, footer: str = "") -> str:
+    """Render a section: heading, per-tool bullet, per-tool when_to_use, footer."""
+    parts = [heading, ""]
+    for spec in specs:
+        parts.append(f"- **{spec.name}**: {spec.short}")
+    parts.append("")
+    for spec in specs:
+        parts.append(f"### {spec.name}")
+        parts.append(spec.when_to_use)
+        parts.append("")
+    if footer:
+        parts.append(footer)
+    return "\n".join(parts).rstrip() + "\n"
+
+
 def get_a2a_instructions(mcp: bool = True) -> str:
    """Return inter-agent communication instructions for system-prompt injection.

-    Pass `mcp=True` (default) for MCP-capable runtimes (Claude Code via SDK,
-    Codex). Pass `mcp=False` for CLI-only runtimes (Ollama, custom) that have
-    to call a2a_cli.py as a subprocess.
+    Generated from the platform_tools registry. Pass `mcp=True` (default)
+    for MCP-capable runtimes (claude-code, hermes, langchain, crewai).
+    Pass `mcp=False` for CLI-only runtimes (ollama, custom subprocess
+    runtimes that don't speak MCP) — those get a static block describing
+    the molecule_runtime.a2a_cli subprocess interface instead.
    """
-    return _A2A_INSTRUCTIONS_MCP if mcp else _A2A_INSTRUCTIONS_CLI
-
-
-_HMA_INSTRUCTIONS = """## Hierarchical Memory (HMA)
-You have persistent memory tools that survive across sessions and restarts:
-
- **commit_memory(content, scope)**: Save important information.
-  - LOCAL: private to you only (default)
-  - TEAM: shared with your parent workspace and siblings (same team)
-  - GLOBAL: shared with the entire org (only root workspaces can write)
-
- **recall_memory(query)**: Search your accessible memories. Returns LOCAL + TEAM + GLOBAL matches.
-
-**When to use memory:**
- After making a decision or learning something non-obvious → commit_memory("decision X because Y", scope="TEAM")
- Before starting work → recall_memory("what did the team decide about X")
- When you discover org-wide knowledge (repo locations, API patterns, conventions) → commit_memory(fact, scope="GLOBAL") if you are a root workspace, or scope="TEAM" to share with your team
- After completing a task → commit_memory("completed task X, PR #N opened", scope="TEAM") so your lead and teammates know
-
-**Memory is automatically recalled** at the start of each new session. Use it proactively during work to share context.
-"""
+    if not mcp:
+        return _A2A_INSTRUCTIONS_CLI
+    from platform_tools.registry import a2a_tools
+    return _render_section(
+        "## Inter-Agent Communication",
+        a2a_tools(),
+        footer=_A2A_FOOTER,
+    )


 def get_hma_instructions() -> str:
-    """Return HMA memory instructions for system-prompt injection."""
-    return _HMA_INSTRUCTIONS
+    """Return HMA persistent-memory instructions for system-prompt injection.
+
+    Generated from the platform_tools registry.
+    """
+    from platform_tools.registry import memory_tools
+    return _render_section(
+        "## Hierarchical Memory (HMA)",
+        memory_tools(),
+        footer=(
+            "Memory is automatically recalled at the start of each new "
+            "session. Use commit_memory proactively during work so future "
+            "sessions and teammates can recall what you learned."
+        ),
+    )


 # ========================================================================
--- a/workspace/main.py
+++ b/workspace/main.py
@ -337,11 +337,16 @@ async def main():  # pragma: no cover
                # Rebuild the agent's tool list from updated skills
                if hasattr(adapter, "all_tools") and hasattr(adapter, "system_prompt"):
                    from builtin_tools.approval import request_approval
-                    from builtin_tools.delegation import delegate_to_workspace
-                    from builtin_tools.memory import commit_memory, search_memory
+                    from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
+                    from builtin_tools.memory import commit_memory, recall_memory
                    from builtin_tools.sandbox import run_code
-                    base_tools = [delegate_to_workspace, request_approval,
-                                  commit_memory, search_memory, run_code]
+                    # Core platform tools mirror adapter_base.all_tools — must
+                    # match the platform_tools registry names so docs and tools
+                    # never drift.
+                    base_tools = [
+                        delegate_task, delegate_task_async, check_task_status,
+                        request_approval, commit_memory, recall_memory, run_code,
+                    ]
                    skill_tools = []
                    for sk in adapter.loaded_skills:
                        skill_tools.extend(sk.tools)
--- a/workspace/platform_tools/init.py
+++ b/workspace/platform_tools/init.py
@ -0,0 +1,13 @@
+"""Platform tools — single source of truth for tool naming and docs.
+
+The platform owns A2A and persistent-memory tooling (cross-cutting
+runtime concerns per project memory project_runtime_native_pluggable.md).
+Tools are defined ONCE in `registry.py`. Every adapter — MCP server,
+LangChain wrapper, any future SDK integration — consumes the specs to
+register the tool in its native format. Doc generators (system-prompt
+injection, canvas help, future doc sites) read from the same place.
+
+Adding a tool: append a ToolSpec to TOOLS in registry.py. Every
+adapter picks it up automatically; structural tests fail if any side
+drifts from the registry.
+"""
--- a/workspace/platform_tools/registry.py
+++ b/workspace/platform_tools/registry.py
@ -0,0 +1,388 @@
+"""Canonical registry of platform tool specs.
+
+Every tool the platform offers to agents (A2A delegation, persistent
+memory, broadcast, introspection) is defined ONCE in TOOLS below.
+Adapters consume these specs to register the tool in their native
+runtime format:
+
+  - a2a_mcp_server.py iterates `TOOLS` to build the MCP TOOLS list +
+    dispatches calls to spec.impl. No tool name or description is
+    hardcoded there.
+
+  - builtin_tools/{delegation,memory}.py define LangChain `@tool`
+    wrappers using `name=` from the spec; the wrapper body just
+    calls spec.impl.
+
+  - executor_helpers.get_a2a_instructions() / get_hma_instructions()
+    GENERATE the system-prompt doc string from `TOOLS` — no
+    hand-maintained instruction text.
+
+Adding a new tool: append a ToolSpec to `TOOLS` below. Every adapter
+picks it up. Structural alignment tests (workspace/tests/test_platform_tools.py)
+fail if any side drifts from the registry.
+
+Renaming a tool: change `name` here. Search workspace/ for the old
+literal in case any non-adapter consumer (tests, plugin code) hard-coded
+it; update those manually. The grep is the audit, the test is the gate.
+
+Removing a tool: delete the entry. Adapters stop registering it
+automatically; doc generators stop mentioning it.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any, Literal
+
+from a2a_tools import (
+    tool_check_task_status,
+    tool_commit_memory,
+    tool_delegate_task,
+    tool_delegate_task_async,
+    tool_get_workspace_info,
+    tool_list_peers,
+    tool_recall_memory,
+    tool_send_message_to_user,
+)
+
+# Section name maps to the heading in the agent-facing system prompt.
+# Adding a new section: add a constant + create a corresponding
+# generator in executor_helpers (or generalize get_*_instructions).
+A2A_SECTION = "a2a"
+MEMORY_SECTION = "memory"
+
+Section = Literal["a2a", "memory"]
+
+
+@dataclass(frozen=True)
+class ToolSpec:
+    """Runtime-agnostic definition of one platform tool.
+
+    Each adapter (MCP, LangChain, future SDK) consumes the same spec.
+    Doc generators consume the same spec. There is no other source
+    of truth for tool naming or description.
+    """
+
+    name: str
+    """The exact name agents see. MUST match every adapter's
+    registered name and the literal that appears in agent-facing
+    instruction docs. Structural test enforces this."""
+
+    short: str
+    """One-line description. Used as the MCP `description` field
+    AND as the bullet line in agent-facing instruction docs."""
+
+    when_to_use: str
+    """Two-to-three-sentence agent-facing usage guidance — when
+    to call this tool, what it returns, what NOT to confuse it
+    with. Concatenated into the system prompt below the tool list."""
+
+    input_schema: dict[str, Any]
+    """JSON Schema for the tool's input parameters. Consumed
+    directly by the MCP server. LangChain derives its schema from
+    Python type annotations on the @tool function — alignment is
+    pinned by the structural test."""
+
+    impl: Callable[..., Awaitable[str]]
+    """The actual coroutine. Both adapters call this; only the
+    wrapping differs."""
+
+    section: Section
+    """Which agent-prompt section this tool belongs to (controls
+    which instruction generator emits it)."""
+
+
+# ---------------------------------------------------------------------------
+# A2A — inter-agent communication & broadcast
+# ---------------------------------------------------------------------------
+
+_DELEGATE_TASK = ToolSpec(
+    name="delegate_task",
+    short=(
+        "Delegate a task to a peer workspace via A2A and WAIT for the "
+        "response (synchronous)."
+    ),
+    when_to_use=(
+        "Use for QUICK questions and small sub-tasks where you can "
+        "afford to wait inline. Returns the peer's response text "
+        "directly. For longer-running work (research, multi-minute "
+        "jobs) use delegate_task_async + check_task_status instead "
+        "so you don't hold this workspace busy waiting."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "workspace_id": {
+                "type": "string",
+                "description": "Target workspace ID (from list_peers).",
+            },
+            "task": {
+                "type": "string",
+                "description": "Task description to send to the peer.",
+            },
+        },
+        "required": ["workspace_id", "task"],
+    },
+    impl=tool_delegate_task,
+    section=A2A_SECTION,
+)
+
+_DELEGATE_TASK_ASYNC = ToolSpec(
+    name="delegate_task_async",
+    short=(
+        "Send a task to a peer and return immediately with a task_id "
+        "(non-blocking)."
+    ),
+    when_to_use=(
+        "Use for long-running work where you want to keep doing other "
+        "things while the peer processes. Poll with check_task_status "
+        "to retrieve the result. The platform's A2A queue handles "
+        "delivery + retries; the peer works independently."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "workspace_id": {
+                "type": "string",
+                "description": "Target workspace ID (from list_peers).",
+            },
+            "task": {
+                "type": "string",
+                "description": "Task description to send to the peer.",
+            },
+        },
+        "required": ["workspace_id", "task"],
+    },
+    impl=tool_delegate_task_async,
+    section=A2A_SECTION,
+)
+
+_CHECK_TASK_STATUS = ToolSpec(
+    name="check_task_status",
+    short=(
+        "Poll the status of a task started with delegate_task_async; "
+        "returns result when done."
+    ),
+    when_to_use=(
+        "Statuses: pending/in_progress (peer still working — wait), "
+        "queued (peer is busy with a prior task — DO NOT retry, the "
+        "platform stitches the response when it finishes), completed "
+        "(result available), failed (real error — fall back to a "
+        "different peer or handle it yourself)."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "workspace_id": {
+                "type": "string",
+                "description": "Workspace ID the task was sent to.",
+            },
+            "task_id": {
+                "type": "string",
+                "description": "task_id returned by delegate_task_async.",
+            },
+        },
+        "required": ["workspace_id", "task_id"],
+    },
+    impl=tool_check_task_status,
+    section=A2A_SECTION,
+)
+
+_LIST_PEERS = ToolSpec(
+    name="list_peers",
+    short=(
+        "List the workspaces this agent can communicate with — name, "
+        "ID, status, role for each."
+    ),
+    when_to_use=(
+        "Call this first when you need to delegate but don't know the "
+        "target's ID. Access control is enforced — you only see "
+        "siblings, parent, and direct children."
+    ),
+    input_schema={"type": "object", "properties": {}},
+    impl=tool_list_peers,
+    section=A2A_SECTION,
+)
+
+_GET_WORKSPACE_INFO = ToolSpec(
+    name="get_workspace_info",
+    short="Get this workspace's own info — ID, name, role, tier, parent, status.",
+    when_to_use=(
+        "Use to introspect your own identity (e.g. before reporting "
+        "back to the user, or to determine whether you're a tier-0 "
+        "root that can write GLOBAL memory)."
+    ),
+    input_schema={"type": "object", "properties": {}},
+    impl=tool_get_workspace_info,
+    section=A2A_SECTION,
+)
+
+_SEND_MESSAGE_TO_USER = ToolSpec(
+    name="send_message_to_user",
+    short=(
+        "Send a message directly to the user's canvas chat — pushed instantly "
+        "via WebSocket. Use this to: (1) acknowledge a task immediately ('Got "
+        "it, I'll start working on this'), (2) send interim progress updates "
+        "while doing long work, (3) deliver follow-up results after delegation "
+        "completes, (4) attach files (zip, pdf, csv, image) for the user to "
+        "download via the `attachments` field (NEVER paste file URLs in "
+        "`message`). The message appears in the user's chat as if you're "
+        "proactively reaching out."
+    ),
+    when_to_use=(
+        "Use proactively across the lifecycle of a task — early to "
+        "acknowledge, mid-flight to update, late to deliver. Never paste "
+        "file URLs in the message body — always pass absolute paths in "
+        "`attachments` so the platform serves them as download chips "
+        "(works on SaaS where external file hosts are unreachable)."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "message": {
+                "type": "string",
+                # The "no URLs in message text" rule is the single biggest
+                # cause of bad chat UX: agents drop catbox.moe / file://
+                # / temporary upload-host links into the prose, the
+                # canvas renders them as plain markdown links the user
+                # can't preview, and SaaS deployments often can't even
+                # reach those external hosts. Every download MUST go
+                # through the structured `attachments` field below.
+                "description": (
+                    "Caption text for the chat bubble. Required even when sending "
+                    "attachments — set to a short label like 'Here's the build:' "
+                    "or 'Done — see attached.'\n\n"
+                    "DO NOT paste file URLs, download links, or container paths in "
+                    "this string. Files MUST go through the `attachments` field, "
+                    "which renders as a clickable download chip and works on SaaS "
+                    "deployments where external file-host URLs (catbox.moe, file://, "
+                    "etc.) are unreachable from the user's browser."
+                ),
+            },
+            "attachments": {
+                "type": "array",
+                "description": (
+                    "REQUIRED for any file delivery. Pass absolute file paths inside "
+                    "THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) "
+                    "— the platform uploads each file and returns a download chip "
+                    "with the file's icon + name + size in the user's chat. The chip "
+                    "works in SaaS deployments because the URL is platform-served, "
+                    "not an external host.\n\n"
+                    "USE THIS instead of: pasting URLs in `message`, base64-encoding "
+                    "in the body, or telling the user to look at a path on disk. "
+                    "If the file isn't already on disk, write it first (Bash, Write "
+                    "tool, etc.) then pass its path here. 25 MB per file cap."
+                ),
+                "items": {"type": "string"},
+            },
+        },
+        "required": ["message"],
+    },
+    impl=tool_send_message_to_user,
+    section=A2A_SECTION,
+)
+
+
+# ---------------------------------------------------------------------------
+# HMA — hierarchical persistent memory
+# ---------------------------------------------------------------------------
+
+_COMMIT_MEMORY = ToolSpec(
+    name="commit_memory",
+    short="Save a fact to persistent memory; survives across sessions and restarts.",
+    when_to_use=(
+        "Scopes: LOCAL (private to you, default), TEAM (shared with "
+        "parent + siblings), GLOBAL (entire org — only tier-0 root "
+        "workspaces can write). Commit decisions, learned facts, and "
+        "completed-task summaries so future sessions and teammates "
+        "can recall them."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "content": {
+                "type": "string",
+                "description": "What to remember — be specific.",
+            },
+            "scope": {
+                "type": "string",
+                "enum": ["LOCAL", "TEAM", "GLOBAL"],
+                "description": "Memory scope (default LOCAL).",
+            },
+        },
+        "required": ["content"],
+    },
+    impl=tool_commit_memory,
+    section=MEMORY_SECTION,
+)
+
+_RECALL_MEMORY = ToolSpec(
+    name="recall_memory",
+    short="Search persistent memory; returns matching LOCAL + TEAM + GLOBAL rows.",
+    when_to_use=(
+        "Call at the start of new work and when picking up something "
+        "you may have done before. Empty query returns ALL accessible "
+        "memories — cheap and avoids missing rows that don't match a "
+        "narrow keyword. Memory is automatically recalled at session "
+        "start; use this to refresh mid-session."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "Search query (empty returns all).",
+            },
+            "scope": {
+                "type": "string",
+                "enum": ["LOCAL", "TEAM", "GLOBAL", ""],
+                "description": "Filter by scope (empty = all accessible).",
+            },
+        },
+    },
+    impl=tool_recall_memory,
+    section=MEMORY_SECTION,
+)
+
+
+# ---------------------------------------------------------------------------
+# Public registry. Keep alphabetically grouped by section for stable
+# adapter listings + diff-friendly review.
+# ---------------------------------------------------------------------------
+
+TOOLS: list[ToolSpec] = [
+    # A2A
+    _DELEGATE_TASK,
+    _DELEGATE_TASK_ASYNC,
+    _CHECK_TASK_STATUS,
+    _LIST_PEERS,
+    _GET_WORKSPACE_INFO,
+    _SEND_MESSAGE_TO_USER,
+    # HMA
+    _COMMIT_MEMORY,
+    _RECALL_MEMORY,
+]
+
+
+def a2a_tools() -> list[ToolSpec]:
+    """All A2A-section tools, in registration order."""
+    return [t for t in TOOLS if t.section == A2A_SECTION]
+
+
+def memory_tools() -> list[ToolSpec]:
+    """All memory-section tools, in registration order."""
+    return [t for t in TOOLS if t.section == MEMORY_SECTION]
+
+
+def by_name(name: str) -> ToolSpec:
+    """Look up a spec by its canonical name. Raises KeyError if absent."""
+    for t in TOOLS:
+        if t.name == name:
+            return t
+    raise KeyError(f"no platform tool named {name!r}")
+
+
+def tool_names() -> list[str]:
+    """Canonical names in registration order."""
+    return [t.name for t in TOOLS]
--- a/workspace/policies/routing.py
+++ b/workspace/policies/routing.py
@ -64,7 +64,7 @@ def build_team_routing_payload(
        "action": "choose_member",
        "message": (
            f"You have {len(members)} team members. "
-            "Choose the best one for this task and call delegate_to_workspace with their ID."
+            "Choose the best one for this task and call delegate_task_async with their ID."
        ),
        "task": task,
        "members": members,
--- a/workspace/prompt.py
+++ b/workspace/prompt.py
@ -4,6 +4,7 @@ import logging
 import os
 from pathlib import Path

+from executor_helpers import get_a2a_instructions, get_hma_instructions
 from skill_loader.loader import LoadedSkill
 from shared_runtime import build_peer_section

@ -68,6 +69,7 @@ def build_system_prompt(
    plugin_prompts: list[str] | None = None,
    parent_context: list[dict] | None = None,
    platform_instructions: str = "",
+    a2a_mcp: bool = True,
 ) -> str:
    """Build the complete system prompt.

@ -154,6 +156,20 @@ def build_system_prompt(
            parts.append(skill.instructions)
            parts.append("")

+    # Platform tool instructions: A2A (inter-agent communication) and HMA
+    # (persistent memory). These document how to call delegate_task,
+    # commit_memory, etc — without them, agents see the tools registered
+    # but have no instructions on when/how to use them. Placed between
+    # Skills and Peers so the A2A docs precede the peer list (which is
+    # the data shape the A2A tools operate over).
+    #
+    # a2a_mcp=True: MCP tool variant (claude-code, hermes, langchain,
+    # crewai). a2a_mcp=False: CLI subprocess variant (ollama, custom
+    # runtimes that don't speak MCP). Default True matches the
+    # MCP-capable majority; CLI-only adapters override at the call site.
+    parts.append(get_a2a_instructions(mcp=a2a_mcp))
+    parts.append(get_hma_instructions())
+
    # Add peer capabilities with a single shared renderer.
    peer_section = build_peer_section(peers)
    if peer_section:
--- a/workspace/requirements.txt
+++ b/workspace/requirements.txt
@ -9,10 +9,10 @@
 a2a-sdk[http-server]>=1.0.0,<2.0

 # HTTP / server
-httpx>=0.27.0
-uvicorn>=0.30.0
+httpx>=0.28.1
+uvicorn>=0.46.0
 starlette>=0.38.0
-websockets>=12.0
+websockets>=16.0

 # Config parsing
 pyyaml>=6.0
@ -24,7 +24,7 @@ langchain-core>=0.3.0
 # tools/telemetry.py gracefully degrades (noop) when these are absent,
 # but they are required for actual trace export.
 opentelemetry-api>=1.24.0
-opentelemetry-sdk>=1.24.0
+opentelemetry-sdk>=1.41.1
 # OTLP/HTTP exporter: sends spans to any OTEL collector and to Langfuse ≥4
 opentelemetry-exporter-otlp-proto-http>=1.24.0

@ -36,4 +36,4 @@ sqlalchemy>=2.0.0
 # tasks survive crashes and can resume.  The module and TemporalWorkflowWrapper
 # load cleanly without this package — all paths fall back to direct execution.
 # Requires a running Temporal server; set TEMPORAL_HOST=<host>:7233 to enable.
-temporalio>=1.7.0
+temporalio>=1.26.0
--- a/workspace/shared_runtime.py
+++ b/workspace/shared_runtime.py
@ -140,7 +140,7 @@ def build_peer_section(
    *,
    heading: str = "## Your Peers (workspaces you can delegate to)",
    instruction: str = (
-        "Use the `delegate_to_workspace` tool to send tasks to peers. "
+        "Use the `delegate_task_async` tool to send tasks to peers. "
        "Only delegate to peers listed above."
    ),
 ) -> str:
--- a/workspace/tests/conftest.py
+++ b/workspace/tests/conftest.py
@ -113,10 +113,12 @@ def _make_tools_mocks():
    tools_mod.__path__ = []  # Make it a proper package

    tools_delegation_mod = ModuleType("builtin_tools.delegation")
-    tools_delegation_mod.delegate_to_workspace = MagicMock()
-    tools_delegation_mod.delegate_to_workspace.name = "delegate_to_workspace"
-    tools_delegation_mod.check_delegation_status = MagicMock()
-    tools_delegation_mod.check_delegation_status.name = "check_delegation_status"
+    tools_delegation_mod.delegate_task = MagicMock()
+    tools_delegation_mod.delegate_task.name = "delegate_task"
+    tools_delegation_mod.delegate_task_async = MagicMock()
+    tools_delegation_mod.delegate_task_async.name = "delegate_task_async"
+    tools_delegation_mod.check_task_status = MagicMock()
+    tools_delegation_mod.check_task_status.name = "check_task_status"

    tools_approval_mod = ModuleType("builtin_tools.approval")
    tools_approval_mod.request_approval = MagicMock()
@ -125,8 +127,8 @@ def _make_tools_mocks():
    tools_memory_mod = ModuleType("builtin_tools.memory")
    tools_memory_mod.commit_memory = MagicMock()
    tools_memory_mod.commit_memory.name = "commit_memory"
-    tools_memory_mod.search_memory = MagicMock()
-    tools_memory_mod.search_memory.name = "search_memory"
+    tools_memory_mod.recall_memory = MagicMock()
+    tools_memory_mod.recall_memory.name = "recall_memory"

    tools_sandbox_mod = ModuleType("builtin_tools.sandbox")
    tools_sandbox_mod.run_code = MagicMock()
--- a/workspace/tests/test_coordinator_routing.py
+++ b/workspace/tests/test_coordinator_routing.py
@ -28,7 +28,7 @@ async def test_route_task_to_team_delegates_preferred_member(monkeypatch):

    delegate = MagicMock()
    delegate.ainvoke = AsyncMock(return_value={"ok": True})
-    monkeypatch.setattr(sys.modules["builtin_tools.delegation"], "delegate_to_workspace", delegate)
+    monkeypatch.setattr(sys.modules["builtin_tools.delegation"], "delegate_task_async", delegate)

    result = await coordinator.route_task_to_team(
        "Do the thing",
@ -58,4 +58,4 @@ def test_build_children_description_reuses_shared_renderer():
    assert "## Your Team (sub-workspaces you coordinate)" in description
    assert "**Alpha** (id: `child-1`, status: online)" in description
    assert "Skills: research" in description
-    assert "delegate_to_workspace" in description
+    assert "delegate_task_async" in description
--- a/workspace/tests/test_delegation.py
+++ b/workspace/tests/test_delegation.py
@ -4,7 +4,7 @@ The delegation tool now returns immediately with a task_id and runs the
 A2A request in the background. Tests verify:
 1. Immediate return with task_id
 2. Background task completion
-3. check_delegation_status retrieval
+3. check_task_status retrieval
 4. Error handling (RBAC, discovery, network)
 """

@ -109,22 +109,22 @@ def delegation_mocks(monkeypatch):


 async def _invoke(mod, workspace_id="target", task="do stuff"):
-    """Call delegate_to_workspace and return the immediate result."""
-    fn = mod.delegate_to_workspace
+    """Call delegate_task_async and return the immediate result."""
+    fn = mod.delegate_task_async
    if hasattr(fn, "ainvoke"):
        return await fn.ainvoke({"workspace_id": workspace_id, "task": task})
    return await fn(workspace_id=workspace_id, task=task)


 async def _invoke_and_wait(mod, workspace_id="target", task="do stuff"):
-    """Call delegate_to_workspace, wait for background task, return status."""
+    """Call delegate_task_async, wait for background task, return status."""
    result = await _invoke(mod, workspace_id, task)
    # Wait for all background tasks to complete
    if mod._background_tasks:
        await asyncio.gather(*mod._background_tasks, return_exceptions=True)
    # Get final status
    if "task_id" in result:
-        fn = mod.check_delegation_status
+        fn = mod.check_task_status
        if hasattr(fn, "ainvoke"):
            return await fn.ainvoke({"task_id": result["task_id"]})
        return await fn(task_id=result["task_id"])
@ -182,7 +182,7 @@ class TestAsyncDelegation:
            await _invoke(mod, workspace_id="ws-a", task="task A")
            await _invoke(mod, workspace_id="ws-b", task="task B")

-        fn = mod.check_delegation_status
+        fn = mod.check_task_status
        if hasattr(fn, "ainvoke"):
            result = await fn.ainvoke({"task_id": ""})
        else:
@ -194,7 +194,7 @@ class TestAsyncDelegation:
    async def test_check_delegation_not_found(self, delegation_mocks):
        mod, *_ = delegation_mocks

-        fn = mod.check_delegation_status
+        fn = mod.check_task_status
        if hasattr(fn, "ainvoke"):
            result = await fn.ainvoke({"task_id": "nonexistent"})
        else:
@ -354,7 +354,7 @@ class TestA2AQueued:


 class TestQueuedLazyRefresh:
-    """When a delegation is QUEUED, check_delegation_status must lazily
+    """When a delegation is QUEUED, check_task_status must lazily
    refresh from the platform's GET /delegations to pick up drain-stitch
    completions. Without this refresh, the LLM sees "queued" forever
    because the platform never pushes back to the runtime.
@ -401,7 +401,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@ -443,7 +443,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@ -486,7 +486,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@ -515,7 +515,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
--- a/workspace/tests/test_executor_helpers.py
+++ b/workspace/tests/test_executor_helpers.py
@ -438,9 +438,12 @@ def test_get_system_prompt_handles_non_utf8(tmp_path):

 def test_get_a2a_instructions_mcp_default():
    out = get_a2a_instructions()
-    assert "MCP tools" in out
+    # Section heading is the canonical agent-facing label.
+    assert "## Inter-Agent Communication" in out
+    # Every A2A tool from the registry must appear by name.
    assert "list_peers" in out
    assert "send_message_to_user" in out
+    assert "delegate_task" in out


 def test_get_a2a_instructions_cli_variant():
@ -468,32 +471,27 @@ def test_a2a_cli_instructions_use_module_invocation_not_legacy_app_path():


 def test_a2a_mcp_instructions_reference_existing_tools():
-    """The MCP instructions text must only reference tools that are actually
-    registered in a2a_mcp_server.py. If someone renames a server tool, the
-    prompt text must be updated in lockstep — this test catches the drift.
+    """Pin the registry-driven alignment: every tool name appearing in the
+    agent-facing A2A instructions must be a tool the MCP server actually
+    registers. Both sides now derive from platform_tools.registry, so the
+    real test is that the registry's a2a_tools() set drives both surfaces
+    consistently.
    """
-    import re
-    import pathlib
-    mcp_server = pathlib.Path(__file__).parent.parent / "a2a_mcp_server.py"
-    registered = set(re.findall(r'"name":\s*"([a-z_]+)"', mcp_server.read_text()))
-    # The server advertises itself by name; strip that false positive.
-    registered.discard("a2a-delegation")
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+    from platform_tools.registry import a2a_tools

+    registered = {t["name"] for t in MCP_TOOLS}
    instructions = get_a2a_instructions(mcp=True)

-    # Every tool called out by name in the instructions must exist on the
-    # server. (We allow the server to have extras the prompt doesn't mention.)
-    referenced = {
-        "list_peers",
-        "delegate_task",
-        "delegate_task_async",
-        "check_task_status",
-        "get_workspace_info",
-        "send_message_to_user",
-    }
-    for name in referenced:
-        assert name in instructions, f"prompt missing {name}"
-        assert name in registered, f"MCP server no longer registers {name}"
+    for spec in a2a_tools():
+        assert spec.name in instructions, (
+            f"A2A instructions are missing the tool {spec.name!r} that "
+            f"the registry declares — the doc generator drifted."
+        )
+        assert spec.name in registered, (
+            f"MCP server no longer registers {spec.name!r} that the registry "
+            f"declares — the MCP TOOLS list drifted from the registry."
+        )


 # ======================================================================
--- a/workspace/tests/test_memory.py
+++ b/workspace/tests/test_memory.py
@ -98,7 +98,7 @@ def test_commit_memory_uses_awareness_client_when_configured(monkeypatch, memory
    assert captured["json"] == {"content": "remember this", "scope": "TEAM"}


-def test_search_memory_uses_platform_fallback_without_awareness(monkeypatch, memory_modules):
+def test_recall_memory_uses_platform_fallback_without_awareness(monkeypatch, memory_modules):
    memory, _awareness_client = memory_modules
    captured = {}

@ -119,7 +119,7 @@ def test_search_memory_uses_platform_fallback_without_awareness(monkeypatch, mem

    monkeypatch.setattr(memory.httpx, "AsyncClient", FakeAsyncClient)

-    result = asyncio.run(memory.search_memory("status", "local"))
+    result = asyncio.run(memory.recall_memory("status", "local"))

    assert result == {
        "success": True,
@ -236,10 +236,10 @@ def test_commit_memory_promoted_packet_logs_skill_promotion(monkeypatch, tmp_pat
    assert not (tmp_path / "skills").exists()


-def test_search_memory_rejects_invalid_scope(memory_modules):
+def test_recall_memory_rejects_invalid_scope(memory_modules):
    memory, _awareness_client = memory_modules

-    result = asyncio.run(memory.search_memory("status", "bad"))
+    result = asyncio.run(memory.recall_memory("status", "bad"))

    assert result == {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}

@ -457,15 +457,15 @@ def test_commit_memory_result_failure(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — RBAC deny
+# recall_memory — RBAC deny
 # ---------------------------------------------------------------------------

-def test_search_memory_rbac_deny(memory_modules_with_mocks):
+def test_recall_memory_rbac_deny(memory_modules_with_mocks):
    memory, mock_audit, _ = memory_modules_with_mocks
    mock_audit.check_permission.return_value = False
    mock_audit.get_workspace_roles.return_value = (["read-only-special"], {})

-    result = asyncio.run(memory.search_memory("find something", "local"))
+    result = asyncio.run(memory.recall_memory("find something", "local"))

    assert result["success"] is False
    assert "RBAC" in result["error"]
@ -473,22 +473,22 @@ def test_search_memory_rbac_deny(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — invalid scope
+# recall_memory — invalid scope
 # ---------------------------------------------------------------------------

-def test_search_memory_invalid_scope(memory_modules_with_mocks):
+def test_recall_memory_invalid_scope(memory_modules_with_mocks):
    memory, _mock_audit, _ = memory_modules_with_mocks

-    result = asyncio.run(memory.search_memory("q", "BAD"))
+    result = asyncio.run(memory.recall_memory("q", "BAD"))

    assert result == {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}


 # ---------------------------------------------------------------------------
-# search_memory — awareness_client success
+# recall_memory — awareness_client success
 # ---------------------------------------------------------------------------

-def test_search_memory_awareness_client_success(memory_modules_with_mocks):
+def test_recall_memory_awareness_client_success(memory_modules_with_mocks):
    from unittest.mock import AsyncMock, MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks

@ -501,7 +501,7 @@ def test_search_memory_awareness_client_success(memory_modules_with_mocks):
    # Patch directly on the loaded module since it imported the name at load time
    memory.build_awareness_client = MagicMock(return_value=mock_ac)

-    result = asyncio.run(memory.search_memory("find", "team"))
+    result = asyncio.run(memory.recall_memory("find", "team"))

    assert result["success"] is True
    assert result["count"] == 2
@ -509,10 +509,10 @@ def test_search_memory_awareness_client_success(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — awareness_client raises
+# recall_memory — awareness_client raises
 # ---------------------------------------------------------------------------

-def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
+def test_recall_memory_awareness_client_exception(memory_modules_with_mocks):
    from unittest.mock import AsyncMock, MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks

@ -521,7 +521,7 @@ def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
    # Patch directly on the loaded module since it imported the name at load time
    memory.build_awareness_client = MagicMock(return_value=mock_ac)

-    result = asyncio.run(memory.search_memory("query", "local"))
+    result = asyncio.run(memory.recall_memory("query", "local"))

    assert result["success"] is False
    assert "awareness search failed" in result["error"]
@ -530,10 +530,10 @@ def test_search_memory_awareness_client_exception(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — httpx 200 success (no awareness_client)
+# recall_memory — httpx 200 success (no awareness_client)
 # ---------------------------------------------------------------------------

-def test_search_memory_httpx_200_success(memory_modules_with_mocks):
+def test_recall_memory_httpx_200_success(memory_modules_with_mocks):
    memory, _mock_audit, _ = memory_modules_with_mocks

    class FakeAsyncClient:
@ -545,7 +545,7 @@ def test_search_memory_httpx_200_success(memory_modules_with_mocks):

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("find", "global"))
+    result = asyncio.run(memory.recall_memory("find", "global"))

    assert result["success"] is True
    assert result["count"] == 2
@ -553,10 +553,10 @@ def test_search_memory_httpx_200_success(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — httpx non-200
+# recall_memory — httpx non-200
 # ---------------------------------------------------------------------------

-def test_search_memory_httpx_non_200(memory_modules_with_mocks):
+def test_recall_memory_httpx_non_200(memory_modules_with_mocks):
    memory, mock_audit, _ = memory_modules_with_mocks

    class FakeAsyncClient:
@ -568,17 +568,17 @@ def test_search_memory_httpx_non_200(memory_modules_with_mocks):

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("q", ""))
+    result = asyncio.run(memory.recall_memory("q", ""))

    assert result["success"] is False
    assert "server error" in result["error"]


 # ---------------------------------------------------------------------------
-# search_memory — httpx raises
+# recall_memory — httpx raises
 # ---------------------------------------------------------------------------

-def test_search_memory_httpx_exception(memory_modules_with_mocks):
+def test_recall_memory_httpx_exception(memory_modules_with_mocks):
    memory, mock_audit, _ = memory_modules_with_mocks

    class FakeAsyncClient:
@ -590,7 +590,7 @@ def test_search_memory_httpx_exception(memory_modules_with_mocks):

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("query", "local"))
+    result = asyncio.run(memory.recall_memory("query", "local"))

    assert result["success"] is False
    assert "request timed out" in result["error"]
@ -672,7 +672,7 @@ def test_commit_memory_awareness_exception_span_record_fails(memory_modules_with
    assert result["success"] is False  # error propagated despite span failure


-def test_search_memory_awareness_exception_span_record_fails(memory_modules_with_mocks):
+def test_recall_memory_awareness_exception_span_record_fails(memory_modules_with_mocks):
    """awareness_client.search raises + span.record_exception also raises: error still returned."""
    from unittest.mock import AsyncMock, MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks
@ -685,7 +685,7 @@ def test_search_memory_awareness_exception_span_record_fails(memory_modules_with
    mock_ac.search = AsyncMock(side_effect=RuntimeError("awareness down"))
    memory.build_awareness_client = MagicMock(return_value=mock_ac)

-    result = asyncio.run(memory.search_memory("test", "local"))
+    result = asyncio.run(memory.recall_memory("test", "local"))
    assert result["success"] is False


@ -711,8 +711,8 @@ def test_commit_memory_httpx_exception_span_record_fails(memory_modules_with_moc
    assert result["success"] is False


-def test_search_memory_httpx_exception_span_record_fails(memory_modules_with_mocks):
-    """httpx raises in search_memory + span.record_exception also raises: error still returned."""
+def test_recall_memory_httpx_exception_span_record_fails(memory_modules_with_mocks):
+    """httpx raises in recall_memory + span.record_exception also raises: error still returned."""
    from unittest.mock import MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks

@ -729,7 +729,7 @@ def test_search_memory_httpx_exception_span_record_fails(memory_modules_with_moc

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("query", "local"))
+    result = asyncio.run(memory.recall_memory("query", "local"))
    assert result["success"] is False


--- a/workspace/tests/test_platform_tools.py
+++ b/workspace/tests/test_platform_tools.py
@ -0,0 +1,123 @@
+"""Structural alignment tests — every adapter must agree with the registry.
+
+The registry in workspace/platform_tools/registry.py is the single source
+of truth for tool naming + docs. These tests fail if any consumer
+(MCP server, LangChain @tool wrappers, doc generators) drifts.
+
+If you add a tool: append a ToolSpec to registry.TOOLS, then add the
+matching @tool wrapper in builtin_tools/. These tests catch the case
+where the registry has a name that has no LangChain @tool counterpart
+(or vice versa).
+
+If you rename a tool: edit registry.TOOLS only. These tests fail loudly
+if the LangChain @tool name or MCP TOOLS["name"] still has the old name.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from platform_tools.registry import TOOLS, a2a_tools, by_name, memory_tools, tool_names
+
+
+def test_registry_names_are_unique():
+    """Every ToolSpec must have a distinct name — duplicate is a typo."""
+    names = tool_names()
+    assert len(names) == len(set(names)), f"duplicate tool names: {names}"
+
+
+def test_registry_a2a_and_memory_partition_is_complete():
+    """Every tool belongs to exactly one section. No orphans."""
+    a2a = {t.name for t in a2a_tools()}
+    mem = {t.name for t in memory_tools()}
+    all_names = set(tool_names())
+    assert a2a | mem == all_names
+    assert not (a2a & mem), f"tool in both sections: {a2a & mem}"
+
+
+def test_by_name_lookup_works():
+    spec = by_name("delegate_task")
+    assert spec.name == "delegate_task"
+    assert spec.section == "a2a"
+    with pytest.raises(KeyError):
+        by_name("nonexistent_tool")
+
+
+def test_mcp_server_registers_every_registry_tool():
+    """The MCP server's TOOLS list is built from the registry. Every
+    spec must produce a corresponding entry — if not, the import-time
+    list comprehension is broken or the registry has an entry the
+    server isn't picking up.
+    """
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+
+    mcp_names = {t["name"] for t in MCP_TOOLS}
+    registry_names = set(tool_names())
+    assert mcp_names == registry_names, (
+        f"MCP and registry diverged. MCP-only: {mcp_names - registry_names}; "
+        f"registry-only: {registry_names - mcp_names}"
+    )
+
+
+def test_mcp_tool_descriptions_match_registry_short():
+    """Each MCP tool's description IS the registry's `short` field —
+    the bullet-line description shown to the model. The deeper
+    when_to_use guidance lives only in the system prompt.
+    """
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+
+    by_mcp_name = {t["name"]: t for t in MCP_TOOLS}
+    for spec in TOOLS:
+        assert by_mcp_name[spec.name]["description"] == spec.short, (
+            f"MCP description for {spec.name!r} drifted from registry.short. "
+            f"Edit registry.py, not the MCP server's TOOLS list."
+        )
+
+
+def test_mcp_tool_input_schemas_match_registry():
+    """Schemas must come from the registry, never duplicated in the server."""
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+
+    by_mcp_name = {t["name"]: t for t in MCP_TOOLS}
+    for spec in TOOLS:
+        assert by_mcp_name[spec.name]["inputSchema"] == spec.input_schema, (
+            f"MCP inputSchema for {spec.name!r} drifted from registry."
+        )
+
+
+def test_a2a_instructions_text_includes_every_a2a_tool():
+    """get_a2a_instructions must mention every a2a-section tool by name."""
+    from executor_helpers import get_a2a_instructions
+
+    instructions = get_a2a_instructions(mcp=True)
+    for spec in a2a_tools():
+        assert spec.name in instructions, (
+            f"agent-facing A2A docs missing tool {spec.name!r} from registry"
+        )
+
+
+def test_hma_instructions_text_includes_every_memory_tool():
+    """get_hma_instructions must mention every memory-section tool by name."""
+    from executor_helpers import get_hma_instructions
+
+    instructions = get_hma_instructions()
+    for spec in memory_tools():
+        assert spec.name in instructions, (
+            f"agent-facing HMA docs missing tool {spec.name!r} from registry"
+        )
+
+
+def test_old_pre_rename_names_not_present_in_docs():
+    """Pre-rename names (delegate_to_workspace, search_memory,
+    check_delegation_status) must not leak back into the agent-facing
+    docs. They're not in the registry; their absence is the canonical
+    state.
+    """
+    from executor_helpers import get_a2a_instructions, get_hma_instructions
+
+    blob = get_a2a_instructions(mcp=True) + get_hma_instructions()
+    for stale in ("delegate_to_workspace", "search_memory", "check_delegation_status"):
+        assert stale not in blob, (
+            f"pre-rename name {stale!r} leaked into docs — registry "
+            f"is the source of truth, not the doc generator."
+        )
--- a/workspace/tests/test_prompt.py
+++ b/workspace/tests/test_prompt.py
@ -202,7 +202,7 @@ def test_peer_capabilities_format(tmp_path):
    assert "## Your Peers" in result
    assert "**Echo Agent** (id: `peer-1`, status: online)" in result
    assert "Skills: echo, repeat" in result
-    assert "delegate_to_workspace" in result
+    assert "delegate_task_async" in result
    # peer-2 has no agent_card but DOES have a DB name + status — must
    # still render so coordinators can delegate to freshly-created peers
    # whose A2A discovery hasn't populated a card yet (regression of the
@ -395,3 +395,77 @@ async def test_get_peer_capabilities_exception():
        result = await get_peer_capabilities("http://platform:8080", "ws-abc")

    assert result == []
+
+
+# Regression tests for the A2A + HMA tool-instruction injection. Pre-fix,
+# get_a2a_instructions() and get_hma_instructions() were defined in
+# executor_helpers.py but never called from build_system_prompt — workers
+# saw the platform's delegate_task / commit_memory tools registered but
+# had no documentation telling them how to use them.
+
+def test_a2a_instructions_injected_default_mcp(tmp_path):
+    """build_system_prompt embeds A2A MCP-variant instructions by default."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=[],
+    )
+
+    assert "## Inter-Agent Communication" in result
+    assert "delegate_task" in result
+    assert "list_peers" in result
+    assert "send_message_to_user" in result
+
+
+def test_a2a_instructions_cli_variant_when_disabled(tmp_path):
+    """a2a_mcp=False emits the CLI subprocess variant for non-MCP runtimes."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=[],
+        a2a_mcp=False,
+    )
+
+    assert "## Inter-Agent Communication" in result
+    assert "molecule_runtime.a2a_cli" in result
+    # MCP-only details must NOT leak into the CLI variant.
+    assert "send_message_to_user" not in result
+
+
+def test_hma_instructions_injected(tmp_path):
+    """build_system_prompt embeds HMA persistent-memory instructions."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=[],
+    )
+
+    assert "## Hierarchical Memory (HMA)" in result
+    assert "commit_memory" in result
+    assert "recall_memory" in result
+
+
+def test_tool_instructions_precede_peer_section(tmp_path):
+    """A2A docs must precede the peer list — peer IDs are operands of A2A tools."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    peers = [{"id": "p1", "name": "Worker", "status": "active", "agent_card": None}]
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=peers,
+    )
+
+    a2a_idx = result.index("## Inter-Agent Communication")
+    peers_idx = result.index("## Your Peers")
+    assert a2a_idx < peers_idx, "A2A instructions must come before the peer list"