Merge pull request #2252 from Molecule-AI/staging

staging → main: auto-promote fcd87b9
Merge pull request #2259 from Molecule-AI/fix/harness-cleanup-failed-event
2026-04-28 22:34:16 -07:00 · 2026-04-29 04:30:34 +00:00 · 2026-04-29 04:30:15 +00:00 · 2026-04-28 21:01:44 -07:00 · 2026-04-28 21:00:38 -07:00 · 2026-04-29 03:56:35 +00:00
57 changed files with 3100 additions and 1037 deletions
@@ -13,3 +13,11 @@ workspace/entrypoint.sh text eol=lf
 # but keep LF for consistency across platforms.
 Dockerfile text eol=lf
 *.dockerfile text eol=lf
+
+# Snapshot golden files — workspace/tests/snapshots/*.txt is consumed by
+# byte-exact comparisons in test_platform_tools.py. A Windows contributor
+# with auto-CRLF=true would otherwise convert \n → \r\n on checkout, the
+# snapshot tests would fail mysteriously locally / pass in CI (or vice
+# versa), and the regen instructions in the test-file header would
+# produce LF files that disagree with the working-copy CRLF versions.
+workspace/tests/snapshots/*.txt text eol=lf
@@ -0,0 +1,80 @@
+# Dependabot — auto-bump pinned dependencies.
+#
+# Why this exists:
+#
+# All `uses:` references in .github/workflows/*.yml are pinned to commit
+# SHAs (with `# v<N>` comments for human readability) instead of mutable
+# tags like `@v4`. Tag pinning is a known supply-chain risk: a maintainer
+# (or compromised maintainer account) can repoint `@v4` to malicious code
+# and our pipelines silently pull it. SHA pinning closes that risk.
+#
+# But SHA pinning has a maintenance cost: each upstream legitimate fix
+# requires manually finding + bumping the SHA. Dependabot for Actions
+# closes that gap by opening PRs to bump pinned SHAs whenever upstream
+# tags a new version. Reviewer evaluates the bump like any other
+# dependency PR.
+#
+# Combined: SHA pinning gives us security, Dependabot keeps us current.
+
+version: 2
+updates:
+  # GitHub Actions — every workflow file under .github/workflows/.
+  # Weekly cadence is enough for a CI surface this size; the supply-
+  # chain attack window is "minutes between repoint and pull," and
+  # weekly auto-bumps don't help with zero-days regardless. The point
+  # is to pull in non-zero-day fixes without operator effort, not to
+  # be real-time.
+  - package-ecosystem: github-actions
+    directory: "/"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - github-actions
+    commit-message:
+      prefix: chore(deps)
+      include: scope
+
+  # Go module — workspace-server. Bumps go.mod deps via PR weekly.
+  - package-ecosystem: gomod
+    directory: "/workspace-server"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - go
+    commit-message:
+      prefix: chore(deps)
+      include: scope
+
+  # npm — canvas (Next.js bundle). Largest dep tree in this repo;
+  # weekly cadence keeps the security surface fresh without flooding
+  # the queue. open-pull-requests-limit: 10 because npm churns more
+  # than the others.
+  - package-ecosystem: npm
+    directory: "/canvas"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 10
+    labels:
+      - dependencies
+      - npm
+    commit-message:
+      prefix: chore(deps)
+      include: scope
+
+  # Python — workspace runtime requirements. Pip/requirements.txt-
+  # backed rather than pyproject.toml; Dependabot supports both.
+  - package-ecosystem: pip
+    directory: "/workspace"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - python
+    commit-message:
+      prefix: chore(deps)
+      include: scope
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""Lint SECRET_PATTERNS drift across known consumers of molecule-core's canonical.
+
+The canonical SECRET_PATTERNS array in
+.github/workflows/secret-scan.yml is mirrored by every other side
+that scans for credentials: the workspace-runtime's bundled
+pre-commit hook, the molecule-controlplane inlined copy, etc. The
+mirror is enforced socially today — when someone adds a new pattern
+to canonical (e.g. the sk-cp- MiniMax token after F1088), the other
+sides are supposed to be updated in lockstep.
+
+This script automates the check. Diffs the canonical's pattern set
+against each known public consumer and exits non-zero on any
+mismatch. Wired into a daily cron + on-push gate via
+.github/workflows/secret-pattern-drift.yml.
+
+Private-repo consumers (currently molecule-controlplane's inlined
+copy) are out of scope here because the molecule-core workflow's
+GITHUB_TOKEN can't read other private repos in the org. They're
+expected to self-monitor via their own copy of this script — not a
+hard barrier, just a future expansion.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+import urllib.request
+from pathlib import Path
+
+CANONICAL_FILE = Path(".github/workflows/secret-scan.yml")
+
+# Public consumer mirrors. Each entry is (label, raw_url) — raw_url
+# points at the file's RAW content on the consumer's default branch
+# (or staging where applicable). Add an entry here when a new public
+# repo starts shipping its own SECRET_PATTERNS array.
+CONSUMERS: list[tuple[str, str]] = [
+    (
+        "molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh",
+        "https://raw.githubusercontent.com/Molecule-AI/molecule-ai-workspace-runtime/main/molecule_runtime/scripts/pre-commit-checks.sh",
+    ),
+]
+
+# Matches the SECRET_PATTERNS=( ... ) array in either yaml-indented
+# (the canonical workflow's `run:` block) or shell-flat (runtime
+# hook) format. Patterns inside are single-quoted Bash strings; we
+# pull each via _PATTERN_RE.
+#
+# Closing `)` is anchored to the start of a line (possibly indented)
+# because pattern comments like `# GitHub PAT (classic)` contain
+# their own `)` mid-line — a non-anchored regex would match through
+# the comment's paren and capture only the first pattern.
+_ARRAY_RE = re.compile(r"SECRET_PATTERNS=\((.*?)^\s*\)", re.DOTALL | re.MULTILINE)
+_PATTERN_RE = re.compile(r"'([^']+)'")
+
+
+def extract_patterns(content: str, source_label: str) -> list[str]:
+    """Pull the SECRET_PATTERNS list out of either format. Raises if missing."""
+    m = _ARRAY_RE.search(content)
+    if not m:
+        raise SystemExit(f"::error::{source_label}: SECRET_PATTERNS=(...) array not found")
+    return _PATTERN_RE.findall(m.group(1))
+
+
+def fetch(url: str) -> str:
+    req = urllib.request.Request(
+        url, headers={"User-Agent": "secret-pattern-drift-lint/1"}
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read().decode("utf-8")
+
+
+def diff_patterns(canonical: list[str], consumer: list[str]) -> tuple[list[str], list[str]]:
+    """Return (missing_from_consumer, extra_in_consumer) — both sorted."""
+    canonical_set = set(canonical)
+    consumer_set = set(consumer)
+    return (
+        sorted(canonical_set - consumer_set),
+        sorted(consumer_set - canonical_set),
+    )
+
+
+def main() -> int:
+    if not CANONICAL_FILE.exists():
+        print(f"::error::canonical not found at {CANONICAL_FILE}")
+        return 1
+
+    canonical = extract_patterns(CANONICAL_FILE.read_text(), str(CANONICAL_FILE))
+    print(f"canonical ({CANONICAL_FILE}): {len(canonical)} patterns")
+
+    drift = False
+    for label, url in CONSUMERS:
+        try:
+            content = fetch(url)
+        except Exception as e:
+            # Fetch failures are warnings, not errors. A consumer
+            # whose default branch was just renamed (or whose file
+            # moved) shouldn't fail the lint until someone updates
+            # the URL above. Real drift is the failure mode this
+            # gate exists to catch — fetch reliability isn't.
+            print(f"::warning::{label}: fetch failed ({e}) — skipping")
+            continue
+
+        consumer = extract_patterns(content, label)
+        missing, extra = diff_patterns(canonical, consumer)
+        if not missing and not extra:
+            print(f"  ✓ {label}: aligned ({len(consumer)} patterns)")
+            continue
+
+        drift = True
+        print(f"::error::DRIFT in {label}:")
+        for p in missing:
+            print(f"  -  missing from consumer: {p!r}")
+        for p in extra:
+            print(f"  -  extra in consumer (not in canonical): {p!r}")
+
+    if drift:
+        print()
+        print("::error::SECRET_PATTERNS drift detected. Bring consumer(s) into")
+        print("alignment with the canonical SECRET_PATTERNS array in")
+        print(f"{CANONICAL_FILE} by adding the missing patterns and removing")
+        print("any extras. The two sides must stay byte-aligned on the pattern")
+        print("list — the runtime hook is the developer's local pre-commit,")
+        print("the canonical is the org-wide CI gate, divergence means a token")
+        print("can pass one but get rejected by the other.")
+        return 1
+
+    print()
+    print("✓ All known consumers aligned with canonical SECRET_PATTERNS.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,265 @@
+name: Auto-promote :latest after main image build
+
+# Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
+# → `:latest` after either the image build or E2E completes on a `main`
+# push, gated on E2E Staging SaaS not being red for that SHA.
+#
+# Why two triggers:
+#
+#   `publish-workspace-server-image` and `e2e-staging-saas` are both
+#   paths-filtered, but with DIFFERENT path sets:
+#
+#     publish-workspace-server-image:
+#       workspace-server/**, canvas/**, manifest.json
+#
+#     e2e-staging-saas (full lifecycle):
+#       workspace-server/internal/handlers/{registry,workspace_provision,
+#       a2a_proxy}.go, workspace-server/internal/middleware/**,
+#       workspace-server/internal/provisioner/**, tests/e2e/test_staging_full_saas.sh
+#
+#   The E2E set is a strict SUBSET of the publish set. So:
+#     - canvas/** changes → publish fires, E2E does not
+#     - workspace-server/cmd/** changes → publish fires, E2E does not
+#     - workspace-server/internal/sweep/** → publish fires, E2E does not
+#
+#   The previous version triggered ONLY on E2E completion, which meant
+#   non-E2E-path changes (canvas, cmd, sweep, etc.) rebuilt the image
+#   but never advanced `:latest`. Result: as of 2026-04-28 this workflow
+#   had run zero times since merge despite eight main pushes — `:latest`
+#   was ~7 hours / 9 PRs behind main with no human realising. See
+#   `molecule-core` Slack discussion 2026-04-28.
+#
+#   Adding `publish-workspace-server-image` as a second trigger closes
+#   the gap: any image rebuild on main eligibly advances `:latest`.
+#
+# Why E2E remains a kill-switch (not the trigger):
+#
+#   When E2E DID run for this SHA and ended red, we abort — `:latest`
+#   stays on the prior known-good digest. When E2E didn't run (paths
+#   filtered out), we proceed: pre-merge gates already validated this
+#   SHA on staging via auto-promote-staging requiring CI + E2E Canvas +
+#   E2E API + CodeQL all green. Image content for non-E2E-paths
+#   (canvas, cmd, sweep) is exercised by those staging gates.
+#
+# Why `main` only:
+#
+#   `:latest` is what prod tenants pull. We only want SHAs that have
+#   reached main (via auto-promote-staging) to advance `:latest`.
+#   Triggering on staging would let a staging-only revert advance
+#   `:latest` to a SHA that never reaches main, breaking the "production
+#   runs what's on main" invariant.
+#
+# Idempotency:
+#
+#   When a SHA touches paths that match BOTH publish and E2E, both
+#   workflows fire and complete. Both trigger this workflow on
+#   completion → two runs race. Both retag `:staging-<sha>` →
+#   `:latest`. crane tag is idempotent (re-tagging the same digest is a
+#   no-op), so the second run is harmless. concurrency group serializes
+#   them anyway.
+
+on:
+  workflow_run:
+    workflows:
+      - 'E2E Staging SaaS (full lifecycle)'
+      - 'publish-workspace-server-image'
+    types: [completed]
+    branches: [main]
+  workflow_dispatch:
+    inputs:
+      sha:
+        description: 'Short sha to promote (override; defaults to upstream workflow_run head_sha)'
+        required: false
+        type: string
+
+permissions:
+  contents: read
+  packages: write
+
+concurrency:
+  # Serialize promotes per-SHA so the publish+E2E both-fired race lands
+  # cleanly. Different SHAs can promote in parallel.
+  group: auto-promote-latest-${{ github.event.workflow_run.head_sha || github.event.inputs.sha || github.sha }}
+  cancel-in-progress: false
+
+env:
+  IMAGE_NAME: ghcr.io/molecule-ai/platform
+  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
+
+jobs:
+  promote:
+    # Proceed if upstream succeeded OR manual dispatch. Upstream-failure
+    # paths are filtered here; the E2E-was-red kill-switch lives in the
+    # gate-check step below (covers the case where upstream is publish
+    # success but E2E for the same SHA failed).
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Compute short sha
+        id: sha
+        run: |
+          set -euo pipefail
+          if [ -n "${{ github.event.inputs.sha }}" ]; then
+            FULL="${{ github.event.inputs.sha }}"
+          else
+            FULL="${{ github.event.workflow_run.head_sha }}"
+          fi
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+          echo "full=${FULL}" >> "$GITHUB_OUTPUT"
+
+      - name: Gate — E2E Staging SaaS state for this SHA
+        # When upstream IS E2E success, we know it's green (filtered by
+        # the job-level `if` already). When upstream is publish, look up
+        # E2E state for the same SHA. Four buckets:
+        #
+        #   - completed/success: E2E confirmed safe → proceed
+        #   - completed/failure|cancelled|timed_out: E2E found a
+        #     regression → ABORT (exit 1), `:latest` stays put
+        #   - in_progress|queued|requested: E2E is RACING with publish
+        #     for a runtime-touching SHA. publish typically completes
+        #     ~5-10min before E2E (~10-15min). If we promote on the
+        #     publish signal here, a later E2E failure can't roll back
+        #     `:latest` — it'd already be wrongly advanced. So we DEFER:
+        #     skip subsequent steps (proceed=false) and let E2E's own
+        #     completion event re-fire this workflow, which then takes
+        #     the upstream-is-E2E path. exit 0 so the run shows as
+        #     success rather than a noisy fake-failure.
+        #   - none/none: E2E was paths-filtered out for this SHA (the
+        #     change touched canvas/cmd/sweep/etc. — paths covered by
+        #     publish but not by E2E). pre-merge gates on staging
+        #     already validated this SHA → proceed.
+        #
+        # Manual dispatch skips this check — operator override.
+        id: gate
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
+          SHA: ${{ steps.sha.outputs.full }}
+          UPSTREAM_NAME: ${{ github.event.workflow_run.name }}
+          EVENT_NAME: ${{ github.event_name }}
+        run: |
+          set -euo pipefail
+
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Manual dispatch — skipping E2E gate (operator override)"
+            exit 0
+          fi
+
+          if [ "$UPSTREAM_NAME" = "E2E Staging SaaS (full lifecycle)" ]; then
+            echo "proceed=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Upstream is E2E itself (success per job-level if) — gate trivially satisfied"
+            exit 0
+          fi
+
+          # Upstream is publish-workspace-server-image. Check E2E state.
+          RESULT=$(gh run list \
+            --repo "$REPO" \
+            --workflow e2e-staging-saas.yml \
+            --branch main \
+            --commit "$SHA" \
+            --limit 1 \
+            --json status,conclusion \
+            --jq '.[0] | "\(.status)/\(.conclusion // "none")"' \
+            2>/dev/null || echo "none/none")
+
+          echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
+
+          case "$RESULT" in
+            completed/success)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E green for this SHA — proceeding with promote"
+              ;;
+            completed/failure|completed/cancelled|completed/timed_out)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
+                echo "\`:latest\` stays on the prior known-good digest."
+                echo
+                echo "If the failure was a flake, manually dispatch this workflow with the same sha to override."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+            in_progress/*|queued/*|requested/*|waiting/*|pending/*)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ⏳ Auto-promote deferred — E2E Staging SaaS still running"
+                echo
+                echo "Publish completed before E2E for \`${SHA:0:7}\` (state: \`$RESULT\`)."
+                echo "Skipping retag here — E2E's own completion event will re-fire this workflow."
+                echo "If E2E ends green, that run promotes \`:latest\`. If red, it aborts."
+              } >> "$GITHUB_STEP_SUMMARY"
+              ;;
+            none/none)
+              echo "proceed=true" >> "$GITHUB_OUTPUT"
+              echo "::notice::E2E paths-filtered out for this SHA — pre-merge staging gates carry"
+              ;;
+            *)
+              echo "proceed=false" >> "$GITHUB_OUTPUT"
+              {
+                echo "## ❓ Auto-promote aborted — unexpected E2E state"
+                echo
+                echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\` (unhandled)"
+                echo "Manual investigation needed; re-dispatch with the same sha once resolved."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+              ;;
+          esac
+
+      - if: steps.gate.outputs.proceed == 'true'
+        uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4
+
+      - name: GHCR login
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | \
+            crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Verify :staging-<sha> exists for both images
+        # Better to fail fast with a clear message than to half-tag
+        # (platform retagged but platform-tenant missing → tenants pull
+        # a stale image).
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          set -euo pipefail
+          for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
+            tag="${img}:staging-${{ steps.sha.outputs.short }}"
+            if ! crane manifest "$tag" >/dev/null 2>&1; then
+              echo "::error::Missing tag: $tag"
+              echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote can retag :latest."
+              exit 1
+            fi
+            echo "  ok: $tag exists"
+          done
+
+      - name: Retag platform :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Retag tenant :staging-<sha> → :latest
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Summary
+        if: steps.gate.outputs.proceed == 'true'
+        run: |
+          {
+            echo "## :latest promoted to ${{ steps.sha.outputs.short }}"
+            echo
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "- Trigger: manual dispatch"
+            else
+              echo "- Upstream: \`${{ github.event.workflow_run.name }}\` ([run](${{ github.event.workflow_run.html_url }}))"
+            fi
+            echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo
+            echo "Tenant fleet auto-pulls within 5 min via IMAGE_AUTO_REFRESH=true."
+            echo "Force immediate fanout: dispatch redeploy-tenants-on-main.yml."
+          } >> "$GITHUB_STEP_SUMMARY"
@@ -1,25 +1,62 @@
 name: Auto-promote staging → main

 # Fires after any of the staging-branch quality gates complete. When ALL
-# required gates are green on the same staging SHA, fast-forwards `main`
-# to that SHA automatically — closing the gap that historically let
-# features sit on staging for weeks waiting for a bulk promotion PR
-# (see molecule-core#1496 for the 1172-commit example).
+# required gates are green on the same staging SHA, opens (or re-uses)
+# a PR `staging → main` and enables auto-merge so the merge queue lands
+# it. Closes the gap that historically let features sit on staging for
+# weeks waiting for a bulk promotion PR (see molecule-core#1496 for the
+# 1172-commit example).
+#
+# 2026-04-28 rewrite (PR #142): the previous version did a direct
+# `git merge --ff-only origin staging && git push origin main`. That
+# breaks against main's branch-protection ruleset, which requires
+# status checks "set by the expected GitHub apps" — direct pushes
+# can't satisfy that condition (only PR merges through the queue can).
+# The workflow was failing every tick with:
+#   remote: error: GH006: Protected branch update failed for refs/heads/main.
+#   remote: - Required status checks ... were not set by the expected GitHub apps.
+# Fix: mirror the PR-based pattern from auto-sync-main-to-staging.yml
+# (the reverse-direction sync, fixed in #2234 for the same reason).
+# Both directions now use the same merge-queue path that humans use,
+# no special-case bypass.
 #
 # Safety model:
 # - Runs ONLY on workflow_run events for the staging branch.
 # - Requires EVERY named gate workflow to have the same head_sha and
 #   all be `conclusion == success`. If any of them is red, skipped,
 #   cancelled, or pending, we abort (stay on the current main).
-# - Uses --ff-only: refuses to advance main if main has diverged from
-#   the staging history (e.g. a hotfix landed directly on main). In
-#   that case a human resolves the fork.
-# - Writes a commit summary so the promote shows up in git log as a
-#   deliberate act, not a stealth move.
+# - The PR base=main head=staging path lets GitHub itself enforce
+#   branch protection. If main has diverged from staging or required
+#   checks aren't satisfied, the merge queue declines the PR — no
+#   need for a manual ff-only ancestry check here.
+# - Loop safety: the auto-sync-main-to-staging workflow fires when
+#   main lands the auto-promote PR, but its merge into staging is by
+#   GITHUB_TOKEN which doesn't trigger downstream workflow_run events
+#   (GitHub Actions safety). So this workflow doesn't re-fire from
+#   its own promote landing.
 #
-# **Initial rollout:** ship this file but leave the `enabled` input set
-# such that nothing auto-promotes until staging CI has been reliably
-# green for a few days. Toggle via repo variable `AUTO_PROMOTE_ENABLED`.
+# Toggle via repo variable AUTO_PROMOTE_ENABLED (true/unset). When
+# unset, the workflow logs what it would have done but doesn't open
+# the PR — useful for dry-running the gate logic without surfacing
+# a noisy PR while staging CI is still flaky.
+#
+# **One-time repo setting (load-bearing):** this workflow opens the
+# staging→main PR via `gh pr create` using the default GITHUB_TOKEN.
+# Since GitHub's 2022 default change, that token cannot create or
+# approve PRs unless the repo opts in. The toggle is at:
+#
+#   Settings → Actions → General → Workflow permissions
+#   → ✅ Allow GitHub Actions to create and approve pull requests
+#
+# Without it, every workflow_run fails with:
+#
+#   pull request create failed: GraphQL: GitHub Actions is not
+#   permitted to create or approve pull requests (createPullRequest)
+#
+# Observed 2026-04-29 01:43 UTC blocking promotion of fcd87b9 (PRs
+# #2248 + #2249); manually bridged via PR #2252. Re-check this
+# setting if auto-promote starts failing with createPullRequest
+# errors after a repo or org admin change.

 on:
  workflow_run:
@@ -38,6 +75,7 @@ on:

 permissions:
  contents: write
+  pull-requests: write

 jobs:
  check-all-gates-green:
@@ -61,13 +99,30 @@ jobs:
        run: |
          set -euo pipefail

-          # Required gate workflow names. Must match the `name:` field
-          # in the respective .github/workflows/*.yml files.
+          # Required gate workflow files. Use file paths (relative to
+          # .github/workflows/) rather than display names because:
+          #
+          #   1. `gh run list --workflow=<name>` is ambiguous when two
+          #      workflows have the same `name:` — observed 2026-04-28
+          #      with "CodeQL" matching both `codeql.yml` (explicit) and
+          #      GitHub's UI-configured Code-quality default setup
+          #      (internal "codeql"). gh CLI returns "could not resolve
+          #      to a unique workflow" → empty result → gate evaluated
+          #      as missing/none → auto-promote dead-locked despite all
+          #      checks actually passing.
+          #
+          #   2. File paths are the unique identifier for workflows;
+          #      `name:` is just a display string and can collide.
+          #
+          # When adding/removing a gate, update this list AND the
+          # branch-protection required-checks list (which uses check-run
+          # display names, not workflow names; the two are decoupled and
+          # should be kept in sync manually).
          GATES=(
-            "CI"
-            "E2E Staging Canvas (Playwright)"
-            "E2E API Smoke Test"
-            "CodeQL"
+            "ci.yml"
+            "e2e-staging-canvas.yml"
+            "e2e-api.yml"
+            "codeql.yml"
          )

          echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT"
@@ -117,14 +172,14 @@ jobs:
          set -eu
          # Repo variable AUTO_PROMOTE_ENABLED=true flips this on. While
          # it's unset, the workflow dry-runs (logs what it would have
-          # done) but doesn't actually push to main. Set the variable in
+          # done) but doesn't open the promote PR. Set the variable in
          # Settings → Secrets and variables → Actions → Variables.
          if [ "${AUTO_PROMOTE_ENABLED:-}" != "true" ] && [ "${FORCE_INPUT:-false}" != "true" ]; then
            {
              echo "## ⏸ Auto-promote disabled"
              echo
              echo "Repo variable \`AUTO_PROMOTE_ENABLED\` is not set to \`true\`."
-              echo "All gates are green on staging; would have promoted to \`main\`."
+              echo "All gates are green on staging; would have opened a promote PR to \`main\`."
              echo
              echo "To enable: Settings → Secrets and variables → Actions → Variables → \`AUTO_PROMOTE_ENABLED=true\`."
              echo "To test once manually: workflow_dispatch with \`force=true\`."
@@ -133,50 +188,55 @@ jobs:
            exit 0
          fi

-      - name: Checkout main
-        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
-        uses: actions/checkout@v4
-        with:
-          ref: main
-          fetch-depth: 0
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Fast-forward main → staging HEAD
+      - name: Open (or reuse) staging → main promote PR + enable auto-merge
        if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO: ${{ github.repository }}
          TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
        run: |
-          set -eu
-          git config user.name "github-actions[bot]"
-          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          set -euo pipefail

-          git fetch origin staging
-          git fetch origin main
+          # Look for an existing open promote PR (idempotent on re-run
+          # of the workflow). The PR's head IS the staging branch — the
+          # whole point is "advance main to staging's tip", so we don't
+          # need a per-SHA branch like auto-sync-main-to-staging uses.
+          PR_NUM=$(gh pr list --repo "$REPO" \
+            --base main --head staging --state open \
+            --json number --jq '.[0].number // ""')

-          # Refuse to advance main if it's diverged from staging history.
-          # Someone landed a commit directly on main that's not on
-          # staging → human needs to decide how to reconcile.
-          if ! git merge-base --is-ancestor "$(git rev-parse origin/main)" "$TARGET_SHA"; then
-            {
-              echo "## ❌ Auto-promote refused — main has diverged"
-              echo
-              echo "\`main\` (\`$(git rev-parse --short origin/main)\`) is not an ancestor of staging (\`${TARGET_SHA:0:7}\`)."
-              echo "Someone committed directly to main or the histories forked."
-              echo
-              echo "Resolve manually: merge main into staging, get CI green on the merged commit,"
-              echo "then the auto-promote will succeed on the next run."
-            } >> "$GITHUB_STEP_SUMMARY"
-            exit 1
+          if [ -z "$PR_NUM" ]; then
+            TITLE="staging → main: auto-promote ${TARGET_SHA:0:7}"
+            BODY_FILE=$(mktemp)
+            cat > "$BODY_FILE" <<EOFBODY
+          Automated promotion of \`staging\` (\`${TARGET_SHA:0:8}\`) to \`main\`. All required staging gates green at this SHA: CI, E2E Staging Canvas, E2E API Smoke, CodeQL.
+
+          This PR is auto-generated by \`.github/workflows/auto-promote-staging.yml\` whenever every required gate completes green on the same staging SHA. It exists because main's branch protection requires status checks "set by the expected GitHub apps" — direct \`git push\` from a workflow can't satisfy that, only PR merges through the queue can.
+
+          Merge queue lands this; no human action needed unless gates fail. Reverse-direction sync (the merge commit on main → staging) is handled by \`auto-sync-main-to-staging.yml\`.
+          EOFBODY
+            PR_URL=$(gh pr create --repo "$REPO" \
+              --base main --head staging \
+              --title "$TITLE" \
+              --body-file "$BODY_FILE")
+            PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
+            rm -f "$BODY_FILE"
+            echo "::notice::Opened PR #${PR_NUM}"
+          else
+            echo "::notice::Re-using existing promote PR #${PR_NUM}"
          fi

-          # Fast-forward main to the target SHA.
-          git checkout main
-          git merge --ff-only "$TARGET_SHA"
-          git push origin main
+          # Enable auto-merge — the merge queue picks it up once
+          # required gates are green on the merge_group ref.
+          if ! gh pr merge "$PR_NUM" --repo "$REPO" --auto --merge 2>&1; then
+            echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
+          fi

          {
-            echo "## ✅ Auto-promoted main → ${TARGET_SHA:0:7}"
+            echo "## ✅ Auto-promote PR opened"
            echo
-            echo "All gate workflows green on staging at this SHA."
-            echo "\`main\` fast-forwarded to match."
+            echo "- Source: staging at \`${TARGET_SHA:0:8}\`"
+            echo "- PR: #${PR_NUM}"
+            echo
+            echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
          } >> "$GITHUB_STEP_SUMMARY"
@@ -0,0 +1,213 @@
+name: Auto-sync main → staging
+
+# Reflects every push to `main` back onto `staging` so the
+# staging-as-superset-of-main invariant holds.
+#
+# Background:
+#
+# `auto-promote-staging.yml` advances main via `git merge --ff-only`
+# + `git push origin main` — that's a clean fast-forward, no merge
+# commit. But manual merges of `staging → main` PRs through the
+# GitHub UI / API create a merge commit on main that staging
+# doesn't have. The next `staging → main` PR then evaluates as
+# "BEHIND" because staging is missing that merge commit, requiring
+# a manual `gh pr update-branch` round-trip.
+#
+# This happened twice on 2026-04-28 (PRs #2202, #2205, both manual
+# bridges). Each time the bridge needed update-branch + a re-CI
+# round before merging. Operationally annoying and avoidable.
+#
+# Architecture:
+#
+# This repo's `staging` branch is protected by a `merge_queue`
+# ruleset (id 15500102) that blocks ALL direct pushes — no bypass
+# even for org admins or the GitHub Actions integration. Direct
+# `git push origin staging` returns GH013. So instead of pushing
+# directly, this workflow:
+#
+#   1. Checks if main is already in staging's ancestry → no-op.
+#   2. Creates an `auto-sync/main-<sha>` branch from staging.
+#   3. Tries `git merge --ff-only origin/main` → if staging hasn't
+#      diverged this is a clean ff.
+#   4. Otherwise `git merge --no-ff origin/main` to absorb main's
+#      tip while keeping staging's history.
+#   5. Pushes the auto-sync branch.
+#   6. Opens a PR (base=staging, head=auto-sync/main-<sha>) and
+#      enables auto-merge so the merge queue lands it.
+#
+# This mirrors the path human PRs take through staging — same
+# rules, same gates, no special-case bypass.
+#
+# Loop safety:
+#
+# `GITHUB_TOKEN`-authored merges (including the merge queue's land
+# of the auto-sync PR) do NOT trigger downstream workflow runs
+# (GitHub Actions safety). So when the auto-sync PR lands on
+# staging, `auto-promote-staging.yml` is NOT triggered by that
+# push. The next developer push to staging triggers auto-promote
+# normally. No loop possible.
+#
+# Concurrency:
+#
+# Two pushes to main in quick succession (e.g., manual UI merge
+# immediately followed by auto-promote-staging's ff-merge) could
+# otherwise open two overlapping auto-sync PRs. The concurrency
+# group serializes runs; the second waits for the first to exit.
+# (The first run exits after opening + auto-merge-queueing the PR,
+# not after the merge actually completes — so multiple PRs can be
+# open simultaneously, but the merge queue handles them serially.)
+
+on:
+  push:
+    branches: [main]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: auto-sync-main-to-staging
+  cancel-in-progress: false
+
+jobs:
+  sync-staging:
+    # Self-hosted Mac mini matches the rest of this repo's workflows.
+    runs-on: [self-hosted, macos, arm64]
+    steps:
+      - name: Checkout staging
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+        with:
+          fetch-depth: 0
+          ref: staging
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Configure git author
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Check if staging already contains main
+        id: check
+        run: |
+          set -euo pipefail
+          git fetch origin main
+          if git merge-base --is-ancestor origin/main HEAD; then
+            echo "needs_sync=false" >> "$GITHUB_OUTPUT"
+            {
+              echo "## ✅ No-op"
+              echo
+              echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
+            } >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "needs_sync=true" >> "$GITHUB_OUTPUT"
+            MAIN_SHORT=$(git rev-parse --short=8 origin/main)
+            echo "main_short=${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "branch=auto-sync/main-${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
+            echo "::notice::staging is missing main's tip (${MAIN_SHORT}) — opening sync PR"
+          fi
+
+      - name: Create auto-sync branch + merge main
+        if: steps.check.outputs.needs_sync == 'true'
+        id: prep
+        run: |
+          set -euo pipefail
+          BRANCH="${{ steps.check.outputs.branch }}"
+
+          # If a previous auto-sync run already opened a branch for the
+          # same main sha, prefer reusing it (idempotent behavior on
+          # workflow restart). Force-update from latest staging anyway
+          # so it absorbs any staging-side commits that landed since.
+          git checkout -B "$BRANCH"
+
+          if git merge --ff-only origin/main; then
+            echo "did_ff=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Fast-forwarded ${BRANCH} to origin/main"
+          else
+            echo "did_ff=false" >> "$GITHUB_OUTPUT"
+            if ! git merge --no-ff origin/main -m "chore: sync main → staging (auto)"; then
+              # Hygiene: leave the work tree clean before failing.
+              git merge --abort || true
+              {
+                echo "## ❌ Conflict"
+                echo
+                echo "Auto-merge \`main → staging\` failed with conflicts."
+                echo "A human needs to resolve manually."
+              } >> "$GITHUB_STEP_SUMMARY"
+              exit 1
+            fi
+          fi
+
+      - name: Push auto-sync branch
+        if: steps.check.outputs.needs_sync == 'true'
+        run: |
+          set -euo pipefail
+          # Force-with-lease so a concurrent auto-sync run can't
+          # silently clobber an in-flight branch we just updated. If a
+          # different writer touched the branch, we abort and the next
+          # run picks up the latest state.
+          git push --force-with-lease origin "${{ steps.check.outputs.branch }}"
+
+      - name: Open auto-sync PR + enable auto-merge
+        if: steps.check.outputs.needs_sync == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          BRANCH: ${{ steps.check.outputs.branch }}
+          MAIN_SHORT: ${{ steps.check.outputs.main_short }}
+          DID_FF: ${{ steps.prep.outputs.did_ff }}
+        run: |
+          set -euo pipefail
+
+          # Find existing PR for this branch (idempotent on workflow
+          # restart) before creating a new one.
+          PR_NUM=$(gh pr list --head "$BRANCH" --base staging --state open --json number --jq '.[0].number // ""')
+
+          if [ -z "$PR_NUM" ]; then
+            # Body lives in a temp file to keep the multi-line content
+            # out of the YAML block scalar (un-indented newlines inside
+            # an inline shell string break YAML parsing).
+            BODY_FILE=$(mktemp)
+            if [ "$DID_FF" = "true" ]; then
+              TITLE="chore: sync main → staging (auto, ff to ${MAIN_SHORT})"
+              cat > "$BODY_FILE" <<EOFBODY
+          Automated fast-forward of \`staging\` to \`origin/main\` (\`${MAIN_SHORT}\`). Staging has no in-flight commits that diverge from main. Merge queue lands this; no human action needed.
+
+          This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`. It exists because this repo's \`staging\` branch has a \`merge_queue\` ruleset that blocks direct pushes — even from the GitHub Actions integration.
+          EOFBODY
+            else
+              TITLE="chore: sync main → staging (auto, merge ${MAIN_SHORT})"
+              cat > "$BODY_FILE" <<EOFBODY
+          Automated merge of \`origin/main\` (\`${MAIN_SHORT}\`) into \`staging\`. Staging has commits main doesn't, so this is a non-ff merge that absorbs main's tip. Merge queue lands this.
+
+          This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`.
+          EOFBODY
+            fi
+
+            # gh pr create prints the URL on stdout; extract the PR number.
+            PR_URL=$(gh pr create \
+              --base staging \
+              --head "$BRANCH" \
+              --title "$TITLE" \
+              --body-file "$BODY_FILE")
+            PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
+            rm -f "$BODY_FILE"
+            echo "::notice::Opened PR #${PR_NUM}"
+          else
+            echo "::notice::Re-using existing PR #${PR_NUM} for ${BRANCH}"
+          fi
+
+          # Enable auto-merge — the merge queue picks it up once
+          # required gates are green. Use --merge for merge commits
+          # (matches the rest of this repo's PR convention).
+          if ! gh pr merge "$PR_NUM" --auto --merge 2>&1; then
+            echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
+          fi
+
+          {
+            echo "## ✅ Auto-sync PR opened"
+            echo
+            echo "- Branch: \`$BRANCH\`"
+            echo "- PR: #$PR_NUM"
+            echo "- Strategy: $([ "$DID_FF" = "true" ] && echo "ff" || echo "merge commit")"
+            echo
+            echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
+          } >> "$GITHUB_STEP_SUMMARY"
@@ -38,7 +38,7 @@ jobs:
  tag:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 0    # need full tag history for `git describe` / sort

@@ -26,7 +26,7 @@ jobs:
    name: Block forbidden paths
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 2  # need previous commit to diff against on push events

@@ -66,7 +66,7 @@ jobs:
      E2E_RUN_ID: "canary-${{ github.run_id }}"

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@@ -98,7 +98,7 @@ jobs:
      # next deploy window.
      - name: Open issue on failure
        if: failure()
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
        env:
          # Inject the workflow path explicitly — context.workflow is
          # the *name*, not the file path the actions API needs.
@@ -165,7 +165,7 @@ jobs:

      - name: Auto-close canary issue on success
        if: success()
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
        with:
          script: |
            const title = '🔴 Canary failing: staging SaaS smoke';
@@ -40,7 +40,7 @@ jobs:
      smoke_ran: ${{ steps.smoke.outputs.ran }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Compute sha
        id: compute
@@ -143,7 +143,7 @@ jobs:
    if: ${{ needs.canary-smoke.result == 'success' && needs.canary-smoke.outputs.smoke_ran == 'true' }}
    runs-on: ubuntu-latest
    steps:
-      - uses: imjasonh/setup-crane@v0.4
+      - uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4

      - name: GHCR login
        run: |
@@ -36,7 +36,7 @@ jobs:
    permissions:
      contents: read
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - name: Verify merge_group trigger on required-check workflows
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -32,7 +32,7 @@ jobs:
      python: ${{ steps.check.outputs.python }}
      scripts: ${{ steps.check.outputs.scripts }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 0
      - id: check
@@ -72,8 +72,8 @@ jobs:
      run:
        working-directory: workspace-server
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
        with:
          go-version: 'stable'
      - run: go mod download
@@ -187,8 +187,8 @@ jobs:
      run:
        working-directory: canvas
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-node@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '22'
      - run: rm -f package-lock.json && npm install
@@ -210,7 +210,7 @@ jobs:
    if: needs.changes.outputs.scripts == 'true'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
        # shellcheck is pre-installed on ubuntu-latest runners (via apt).
        # infra/scripts/ is included because setup.sh + nuke.sh gate the
@@ -276,8 +276,8 @@ jobs:
      run:
        working-directory: workspace
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
@@ -53,14 +53,14 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Checkout sibling plugin repo
        # Same reasoning as publish-workspace-server-image.yml — the Go
        # module's replace directive needs the plugin source so
        # CodeQL's "go build" phase can resolve.
        if: matrix.language == 'go'
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
          path: molecule-ai-plugin-github-app-auth
@@ -69,7 +69,7 @@ jobs:
      # jq is pre-installed on ubuntu-latest — no setup step needed.

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v3
+        uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          languages: ${{ matrix.language }}
          # security-extended widens past the default to include the
@@ -77,11 +77,11 @@ jobs:
          queries: security-extended

      - name: Autobuild
-        uses: github/codeql-action/autobuild@v3
+        uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2

      - name: Perform CodeQL Analysis
        id: analyze
-        uses: github/codeql-action/analyze@v3
+        uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          category: "/language:${{ matrix.language }}"
          # upload: never — GHAS isn't enabled on this repo, so the
@@ -121,7 +121,7 @@ jobs:
        # 14-day retention — longer than default 3, short enough not
        # to bloat quota.
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: codeql-sarif-${{ matrix.language }}
          path: sarif-results/${{ matrix.language }}/
@@ -1,27 +1,92 @@
 name: E2E API Smoke Test
 # Extracted from ci.yml so workflow-level concurrency can protect this job
 # from run-level cancellation (issue #458).
+#
+# Trigger model (changed 2026-04-28 — see auto-promote gap below):
+#
+# This workflow always FIRES on push/pull_request to staging+main, but
+# only does real work when paths under `workspace-server/`,
+# `tests/e2e/`, or this workflow file changed. The detect-changes job
+# uses dorny/paths-filter to decide; the e2e-api job runs only if
+# changes match. Otherwise the no-op job emits success so the workflow
+# always produces a `completed/success` run record.
+#
+# Why: auto-promote-staging.yml's gate-check (line 99) treats "workflow
+# didn't run" as failure, which dead-locked any platform-only or
+# test-only push to staging that didn't touch workspace-server paths.
+# Dropping the path filter on the trigger and gating real work
+# internally guarantees the workflow always emits a result that the
+# auto-promote chain can read. Same pattern applied to
+# e2e-staging-canvas.yml in the same PR.

 on:
  push:
    branches: [main, staging]
-    paths:
-      - 'workspace-server/**'
-      - 'tests/e2e/**'
-      - '.github/workflows/e2e-api.yml'
  pull_request:
    branches: [main, staging]
-    paths:
-      - 'workspace-server/**'
-      - 'tests/e2e/**'
-      - '.github/workflows/e2e-api.yml'
+  workflow_dispatch:

 concurrency:
-  group: e2e-api-${{ github.ref }}
+  # Per-SHA grouping (changed 2026-04-28 from per-ref). Per-ref had the
+  # same auto-promote-staging brittleness as e2e-staging-canvas — back-
+  # to-back staging pushes share refs/heads/staging, so the older push's
+  # queued run gets cancelled when a newer push lands. Auto-promote-
+  # staging then sees `completed/cancelled` for the older SHA and stays
+  # put; the newer SHA's gates may eventually save the day, but if the
+  # newer push gets cancelled too, we deadlock.
+  #
+  # See e2e-staging-canvas.yml's identical concurrency block for the full
+  # rationale and the 2026-04-28 incident reference.
+  group: e2e-api-${{ github.event.pull_request.head.sha || github.sha }}
  cancel-in-progress: false

 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      api: ${{ steps.decide.outputs.api }}
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
+        id: filter
+        with:
+          filters: |
+            api:
+              - 'workspace-server/**'
+              - 'tests/e2e/**'
+              - '.github/workflows/e2e-api.yml'
+      - id: decide
+        # Always run real work for manual dispatch — no diff context to
+        # filter against and ops dispatching this expects the suite to
+        # actually exercise the platform.
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "api=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT"
+          fi
+
+  # Same `name:` as the real job below so the check-run produced by the
+  # no-op path is indistinguishable from the real one for branch
+  # protection purposes. Without this, the real job was always skipped on
+  # paths-filtered commits → branch protection on `main` saw "E2E API
+  # Smoke Test" as a missing required check → auto-promote-staging's
+  # `git push origin main` got rejected with GH006. Observed 2026-04-28
+  # 00:22 UTC blocking the staging→main promote despite all gates
+  # actually passing at the workflow level.
+  no-op:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.api != 'true'
+    name: E2E API Smoke Test
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests."
+          echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)."
+
  e2e-api:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.api == 'true'
    name: E2E API Smoke Test
    runs-on: ubuntu-latest
    timeout-minutes: 15
@@ -32,8 +97,8 @@ jobs:
      PG_CONTAINER: molecule-ci-postgres
      REDIS_CONTAINER: molecule-ci-redis
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
        with:
          go-version: 'stable'
          cache: true
@@ -13,16 +13,23 @@ name: E2E Staging Canvas (Playwright)
 # workflow — mirrors what PR #1891 does for e2e-api.yml.

 on:
+  # Trigger model (changed 2026-04-28 — see auto-promote gap below):
+  #
+  # Always fires on push/pull_request; only does real work when canvas/
+  # or this workflow file changed. The detect-changes job uses
+  # dorny/paths-filter to decide; the playwright job runs only if
+  # changes match. Otherwise no-op emits success so the workflow always
+  # produces a `completed/success` run record.
+  #
+  # Why: auto-promote-staging.yml's gate-check (line 99) treats
+  # "workflow didn't run" as failure, which dead-locked platform-only
+  # pushes to staging. Dropping the trigger path filter and gating real
+  # work internally guarantees a result the auto-promote chain can
+  # read. Same pattern applied to e2e-api.yml in the same PR.
  push:
    branches: [main, staging]
-    paths:
-      - 'canvas/**'
-      - '.github/workflows/e2e-staging-canvas.yml'
  pull_request:
    branches: [main, staging]
-    paths:
-      - 'canvas/**'
-      - '.github/workflows/e2e-staging-canvas.yml'
  workflow_dispatch:
  schedule:
    # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
@@ -30,11 +37,68 @@ on:
    - cron: '0 8 * * 0'

 concurrency:
-  group: e2e-staging-canvas
+  # Per-SHA grouping (changed 2026-04-28 from a single global group). The
+  # global group made auto-promote-staging brittle: when a staging push
+  # queued behind an in-flight run and a third entrant (a PR run, a
+  # follow-on push) entered the group, the staging push got cancelled —
+  # leaving auto-promote-staging looking at `completed/cancelled` for a
+  # required gate and refusing to advance main. Observed 2026-04-28
+  # 23:51-23:53 on staging tip 3f99fede.
+  #
+  # The original intent of the global group was to throttle parallel
+  # E2E provisions (each spins a fresh EC2). At our scale that throttle
+  # isn't worth the correctness cost — fresh-org-per-run isolates the
+  # state, and the cost of two parallel runs (~$0.001/min × 10min × 2)
+  # is rounding error vs. the cost of a stuck pipeline.
+  #
+  # Per-SHA still dedupes accidental double-triggers for the SAME SHA.
+  # It does NOT cancel obsolete-PR-version runs on force-push; that
+  # wasted CI is acceptable given the alternative is losing staging-tip
+  # data that auto-promote-staging needs.
+  group: e2e-staging-canvas-${{ github.event.pull_request.head.sha || github.sha }}
  cancel-in-progress: false

 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      canvas: ${{ steps.decide.outputs.canvas }}
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
+        id: filter
+        with:
+          filters: |
+            canvas:
+              - 'canvas/**'
+              - '.github/workflows/e2e-staging-canvas.yml'
+      - id: decide
+        # Always run real tests for manual dispatch and the weekly cron —
+        # both exist precisely to exercise the suite, regardless of diff.
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "schedule" ]; then
+            echo "canvas=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT"
+          fi
+
+  # Same `name:` as the playwright job below so the check-run is
+  # indistinguishable from the real one for branch protection. Mirrors
+  # the e2e-api.yml fix in the same PR — see that file for the
+  # 2026-04-28 incident reference.
+  no-op:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.canvas != 'true'
+    name: Canvas tabs E2E
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "No canvas / workflow changes — E2E Staging Canvas gate satisfied without running tests."
+          echo "::notice::E2E Staging Canvas no-op pass (paths filter excluded this commit)."
+
  playwright:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.canvas == 'true'
    name: Canvas tabs E2E
    runs-on: ubuntu-latest
    timeout-minutes: 40
@@ -49,7 +113,7 @@ jobs:
        working-directory: canvas

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@@ -59,7 +123,7 @@ jobs:
          fi

      - name: Set up Node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '20'
          cache: 'npm'
@@ -76,7 +140,7 @@ jobs:

      - name: Upload Playwright report on failure
        if: failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: playwright-report-staging
          path: canvas/playwright-report-staging/
@@ -84,7 +148,7 @@ jobs:

      - name: Upload screenshots on failure
        if: failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: playwright-screenshots
          path: canvas/test-results/
@@ -92,7 +92,7 @@ jobs:
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@@ -50,7 +50,7 @@ jobs:
      E2E_INTENTIONAL_FAILURE: "1"

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify admin token present
        run: |
@@ -89,7 +89,7 @@ jobs:

      - name: Open issue if safety net is broken
        if: failure()
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
        with:
          script: |
            const title = "🚨 E2E teardown safety net broken";
@@ -34,7 +34,7 @@ jobs:
  promote:
    runs-on: ubuntu-latest
    steps:
-      - uses: imjasonh/setup-crane@v0.4
+      - uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4

      - name: GHCR login
        run: |
@@ -42,17 +42,17 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Log in to GHCR
-        uses: docker/login-action@v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

      - name: Compute tags
        id: tags
@@ -85,7 +85,7 @@ jobs:
          echo "ws_url=${WS_URL}" >> "$GITHUB_OUTPUT"

      - name: Build & push canvas image to GHCR
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: ./canvas
          file: ./canvas/Dockerfile
@@ -81,9 +81,9 @@ jobs:
      version: ${{ steps.version.outputs.version }}
      wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.11"
          cache: pip
@@ -419,9 +419,32 @@ jobs:
          RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
        run: |
          set +e   # don't abort on a single repo failure — collect them all
+          # Schedule-vs-dispatch behaviour split (hardened 2026-04-28
+          # after the sweep-cf-orphans soft-skip incident — same class
+          # of bug):
+          #
+          # The earlier "skipping cascade. templates will pick up the
+          # new version on their own next rebuild" message was wrong —
+          # templates only build on this dispatch trigger; without it
+          # they stay pinned to whatever runtime version they last saw.
+          # A silent skip here means "PyPI is current, templates are
+          # not" and the gap is invisible until someone notices a
+          # template still on the old version weeks later.
+          #
+          #   - push                → exit 1 (red CI surfaces the gap)
+          #   - workflow_dispatch   → exit 0 with a warning (operator
+          #                           ran this ad-hoc; let them rerun
+          #                           after fixing the secret)
          if [ -z "$DISPATCH_TOKEN" ]; then
-            echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade. PyPI was published; templates will pick up the new version on their own next rebuild."
-            exit 0
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade."
+              echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually."
+              exit 0
+            fi
+            echo "::error::TEMPLATE_DISPATCH_TOKEN secret missing — cascade cannot fan out."
+            echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade."
+            echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch."
+            exit 1
          fi
          VERSION="$RUNTIME_VERSION"
          if [ -z "$VERSION" ]; then
@@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Checkout sibling plugin repo
        # workspace-server/Dockerfile expects
@@ -42,21 +42,21 @@ jobs:
        # The PAT needs Contents:Read on Molecule-AI/molecule-ai-plugin-
        # github-app-auth. Falls back to the default token for the (rare)
        # case where an operator made the plugin repo public.
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          repository: Molecule-AI/molecule-ai-plugin-github-app-auth
          path: molecule-ai-plugin-github-app-auth
          token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}

      - name: Log in to GHCR
-        uses: docker/login-action@v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

      - name: Compute tags
        id: tags
@@ -87,7 +87,7 @@ jobs:
      # applyRuntimeModelEnv and caused every E2E to route hermes+openai
      # through openrouter → 401). See issue filed with this PR.
      - name: Build & push platform image to GHCR (staging-<sha> + staging-latest)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          file: ./workspace-server/Dockerfile
@@ -104,7 +104,7 @@ jobs:
            org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify

      - name: Build & push tenant image to GHCR (staging-<sha> + staging-latest)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          file: ./workspace-server/Dockerfile.tenant
@@ -60,8 +60,8 @@ jobs:
    name: PyPI-latest install + import smoke
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
@@ -61,8 +61,8 @@ jobs:
    name: PR-built wheel + import smoke
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
@@ -0,0 +1,57 @@
+name: SECRET_PATTERNS drift lint
+
+# Detects when the canonical SECRET_PATTERNS array in
+# .github/workflows/secret-scan.yml diverges from known consumer
+# mirrors (workspace-runtime's bundled pre-commit hook today; more
+# can be added as the consumer set grows).
+#
+# Why this exists: every side that scans for credentials has its own
+# copy of the pattern list. They drift — most recently the runtime
+# hook lagged the canonical by one pattern (sk-cp- / MiniMax F1088),
+# so a developer's local pre-commit would let a sk-cp- token through
+# while the org-wide CI scan would refuse it. The cost of that drift
+# is dev confusion + delayed feedback; the fix is automated detection.
+#
+# Triggers:
+#   - schedule: daily 05:00 UTC. Catches drift introduced by edits
+#     to a consumer copy that didn't update canonical here.
+#   - push to main/staging where the canonical or this lint changed:
+#     catches the inverse — canonical updated but consumers not yet
+#     bumped. The lint will fail the push; that's intentional, the
+#     person editing canonical is the right person to also update
+#     the consumer.
+#   - workflow_dispatch: ad-hoc operator runs.
+
+on:
+  schedule:
+    # 05:00 UTC = 22:00 PT / 01:00 ET. Quiet hours so a failure
+    # email lands when humans are starting their day, not
+    # interrupting it.
+    - cron: "0 5 * * *"
+  push:
+    branches: [main, staging]
+    paths:
+      - ".github/workflows/secret-scan.yml"
+      - ".github/workflows/secret-pattern-drift.yml"
+      - ".github/scripts/lint_secret_pattern_drift.py"
+  workflow_dispatch:
+
+# GITHUB_TOKEN scoped to read-only. The lint only does git checkout
+# + HTTPS GETs to public consumer files; no writes to anything.
+permissions:
+  contents: read
+
+jobs:
+  lint:
+    name: Detect SECRET_PATTERNS drift
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.11"
+
+      - name: Run drift lint
+        run: python3 .github/scripts/lint_secret_pattern_drift.py
@@ -40,7 +40,7 @@ jobs:
    name: Scan diff for credential-shaped strings
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
        with:
          fetch-depth: 2  # need previous commit to diff against on push events

@@ -148,7 +148,13 @@ jobs:
          SELF=".github/workflows/secret-scan.yml"

          OFFENDING=""
-          for f in $CHANGED; do
+          # `while IFS= read -r` (not `for f in $CHANGED`) so filenames
+          # containing whitespace don't word-split silently — a path
+          # with a space would otherwise produce two iterations on
+          # tokens that aren't real filenames, breaking the
+          # self-exclude + diff lookup.
+          while IFS= read -r f; do
+            [ -z "$f" ] && continue
            [ "$f" = "$SELF" ] && continue
            if [ -n "$DIFF_RANGE" ]; then
              ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true)
@@ -164,11 +170,18 @@ jobs:
                break
              fi
            done
-          done
+          done <<< "$CHANGED"

          if [ -n "$OFFENDING" ]; then
            echo "::error::Credential-shaped strings detected in diff additions:"
-            printf "$OFFENDING"
+            # `printf '%b' "$OFFENDING"` interprets backslash escapes
+            # (the literal `\n` we appended above becomes a newline)
+            # WITHOUT treating OFFENDING as a format string. Plain
+            # `printf "$OFFENDING"` is a format-string sink: a filename
+            # containing `%` would be interpreted as a conversion
+            # specifier, corrupting the error message (or printing
+            # `%(missing)` artifacts).
+            printf '%b' "$OFFENDING"
            echo ""
            echo "The actual matched values are NOT echoed here, deliberately —"
            echo "round-tripping a leaked credential into CI logs widens the blast"
@@ -78,15 +78,30 @@ jobs:
      MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '50' }}

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Verify required secrets present
        id: verify
-        # Soft skip when secrets aren't configured. The 6 secrets have
-        # to be set on the repo manually before this workflow can do
-        # real work; until they are, the schedule is a no-op rather
-        # than a recurring red CI run. workflow_dispatch surfaces a
-        # warning so an operator running it ad-hoc sees the gap.
+        # Schedule-vs-dispatch behaviour split (hardened 2026-04-28
+        # after the silent-no-op incident below):
+        #
+        # The earlier soft-skip-on-schedule policy hid a real leak. All
+        # six secrets were unset on this repo for an unknown duration;
+        # every hourly run printed a yellow ::warning:: and exited 0,
+        # so the workflow registered as "passing" while doing nothing.
+        # CF orphans accumulated to 152/200 (~76% of the zone quota
+        # gone) before a manual `dig`-driven audit caught it. Anything
+        # that runs as a janitor and reports green while idle is
+        # indistinguishable from "the janitor is healthy" — so we now
+        # treat schedule (and any future workflow_run/push triggers)
+        # as a hard-fail when secrets are missing.
+        #
+        #   - schedule / workflow_run / push → exit 1 (red CI run
+        #     surfaces the misconfiguration the next tick)
+        #   - workflow_dispatch              → exit 0 with a warning
+        #     (an operator ran this ad-hoc; they already accepted the
+        #     state of the repo and want the workflow to short-circuit
+        #     so they can rerun after fixing the secret)
        run: |
          missing=()
          for var in CF_API_TOKEN CF_ZONE_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
@@ -95,9 +110,16 @@ jobs:
            fi
          done
          if [ ${#missing[@]} -gt 0 ]; then
-            echo "::warning::skipping sweep — secrets not yet configured: ${missing[*]}"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-            exit 0
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "::warning::skipping sweep — secrets not configured: ${missing[*]}"
+              echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun."
+              echo "skip=true" >> "$GITHUB_OUTPUT"
+              exit 0
+            fi
+            echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
+            echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
+            echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
+            exit 1
          fi
          echo "All required secrets present ✓"
          echo "skip=false" >> "$GITHUB_OUTPUT"
@@ -27,8 +27,8 @@ jobs:
    name: Ops scripts (unittest)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
      - name: Run unittest
@@ -32,13 +32,13 @@
    "@playwright/test": "^1.59.1",
    "@testing-library/jest-dom": "^6.6.0",
    "@testing-library/react": "^16.1.0",
-    "@types/node": "^22.0.0",
+    "@types/node": "^25.6.0",
    "@types/react": "^19.0.0",
    "@types/react-dom": "^19.0.0",
    "@vitejs/plugin-react": "^6.0.1",
    "@vitest/coverage-v8": "^4.1.5",
    "autoprefixer": "^10.4.0",
-    "jsdom": "^25.0.0",
+    "jsdom": "^29.1.0",
    "postcss": "^8.5.12",
    "tailwindcss": "^3.4.0",
    "typescript": "^5.7.0",
@@ -83,6 +83,7 @@ SUBPACKAGES = {
    "adapters",
    "builtin_tools",
    "lib",
+    "platform_tools",
    "plugins_registry",
    "policies",
    "skill_loader",
@@ -0,0 +1,340 @@
+#!/usr/bin/env bash
+#
+# Measure platform-side bounds (or absence thereof) on a coordinator's
+# task execution. Reproduction harness for Issue 4 of the 2026-04-28
+# CP review, surfaced in the RFC at molecule-core#2251.
+#
+# What Issue 4 hypothesized
+# -------------------------
+# A coordinator workspace receives an A2A kickoff, delegates to children,
+# then enters a synthesis phase whose duration the platform does not
+# bound. `DELEGATION_TIMEOUT` (300s, in workspace/builtin_tools/
+# delegation.py) governs the parent→child HTTP request, NOT the
+# coordinator's own task-execution budget. So a coordinator that's
+# spent 10min synthesizing past delegation will keep going until the
+# LLM returns or its host runtime crashes — never bounded by a platform
+# ceiling.
+#
+# Issue 4 explicitly hedged ("This isn't necessarily a platform bug —
+# could be that the Design Director's system prompt told it to do
+# complex synthesis work that exceeded the A2A response window"). This
+# script is the empirical test of which side that ambiguity lands on.
+#
+# What this script does NOT do
+# ----------------------------
+# - It does NOT assert pass/fail. The "bug" is absence-of-bound, which
+#   is hard to assert in a single run. The script outputs measurement
+#   data; the team interprets.
+# - It does NOT simulate a coordinator hang via runtime modification.
+#   Instead, it drives a real coordinator with a synthesis-heavy task
+#   and observes the duration the platform tolerates.
+# - It does NOT clean up on failure. Use scripts/cleanup-rogue-workspaces.sh.
+#
+# What "bug confirmed" looks like (per Issue 4)
+# ---------------------------------------------
+#   coordinator_response_secs > 300 AND no platform_intervention=true
+#   in the heartbeat trace → coordinator ran past DELEGATION_TIMEOUT
+#   (HTTP-level) without any platform ceiling kicking in. The RFC's
+#   V1.0 operator ceiling would convert this into an explicit
+#   `terminated` response at MAX_TASK_EXECUTION_SECS.
+#
+# What "bug refuted" looks like
+# -----------------------------
+#   coordinator_response_secs cleanly bounded by either the LLM API
+#   timeout or some other platform mechanism → Issue 4's premise that
+#   "no platform-enforced timeout" is wrong, V1.0 of the RFC needs
+#   re-justification.
+#
+# Usage
+# -----
+#   # local dev — no auth, no tenant scoping required:
+#   PLATFORM=http://localhost:8080 OPENROUTER_API_KEY=... \
+#     bash scripts/measure-coordinator-task-bounds.sh
+#
+#   # staging — explicit tenant + admin token are mandatory; the script
+#   # refuses to run without them when PLATFORM is non-local:
+#   PLATFORM=https://your-staging-tenant.example \
+#   ADMIN_TOKEN=...           \
+#   TENANT_ID=tenant-uuid     \
+#   OPENROUTER_API_KEY=...    \
+#     bash scripts/measure-coordinator-task-bounds.sh
+#
+#   # dry-run — print plan + auth/scoping summary, exit before any
+#   # state mutation. Use this before pointing at staging:
+#   DRY_RUN=1 PLATFORM=... ADMIN_TOKEN=... TENANT_ID=... \
+#   OPENROUTER_API_KEY=... \
+#     bash scripts/measure-coordinator-task-bounds.sh
+#
+# Cleanup
+# -------
+#   The script deletes both workspaces it created on EXIT (success,
+#   failure, or interrupt). Set KEEP_WORKSPACES=1 to skip cleanup when
+#   you need to inspect the workspaces afterward — but remember to
+#   delete them by hand or chain `cleanup-rogue-workspaces.sh`.
+#
+set -euo pipefail
+
+PLATFORM="${PLATFORM:-http://localhost:8080}"
+# Require an explicitly-set non-empty key. The previous chained
+# default (`${OPENROUTER_API_KEY:-${OPENAI_API_KEY:?...}}`) silently
+# accepted `OPENROUTER_API_KEY=""` and only failed when OPENAI_API_KEY
+# was also unset — defeating the guard against running with no LLM
+# credentials.
+if [ -z "${OPENROUTER_API_KEY:-}" ] && [ -z "${OPENAI_API_KEY:-}" ]; then
+  echo "ERROR: set OPENROUTER_API_KEY (or OPENAI_API_KEY) to a non-empty value" >&2
+  exit 1
+fi
+OR_KEY="${OPENROUTER_API_KEY:-${OPENAI_API_KEY}}"
+
+# Required for non-localhost platforms — staging-api etc. enforce
+# tenant-admin auth on /workspaces. Without it the harness would either
+# 401 every request OR (worse) provision into the wrong tenant.
+# Explicit auth + tenant scoping is mandatory before pointing this at
+# any shared environment. Memory `feedback_never_run_cluster_cleanup_
+# tests_on_live_platform` calls out the same hazard class.
+ADMIN_TOKEN="${ADMIN_TOKEN:-}"
+TENANT_ID="${TENANT_ID:-}"
+case "$PLATFORM" in
+  http://localhost*|http://127.0.0.1*)
+    : # local dev — auth + tenant optional
+    ;;
+  *)
+    if [ -z "$ADMIN_TOKEN" ] || [ -z "$TENANT_ID" ]; then
+      echo "ERROR: PLATFORM=$PLATFORM is non-local — set both ADMIN_TOKEN and TENANT_ID" >&2
+      echo "       (the harness creates real workspaces; running unscoped against shared infra" >&2
+      echo "       can collide with live tenant state. See cluster-cleanup hazard memory.)" >&2
+      exit 1
+    fi
+    ;;
+esac
+
+# Synthesis prompt knob — choose the size of the post-delegation work
+# the coordinator is asked to do. Default exercises 3 delegation rounds
+# with non-trivial aggregation.
+SYNTHESIS_DEPTH="${SYNTHESIS_DEPTH:-3}"
+# Max time we'll wait on the coordinator's A2A response before giving
+# up on this measurement. Set generously (10min) so we don't truncate
+# a slow-but-eventually-completing case.
+A2A_TIMEOUT="${A2A_TIMEOUT:-600}"
+
+# Dry-run prints what would be provisioned + the curl commands, then
+# exits before any state mutation. Use this to confirm the platform
+# URL, tenant scoping, and synthesis prompt are right BEFORE creating
+# real workspaces. Set DRY_RUN=1 to engage.
+DRY_RUN="${DRY_RUN:-0}"
+
+# Workspaces are auto-deleted on EXIT (success, failure, or interrupt)
+# to avoid leaking resources against shared infra. Set KEEP_WORKSPACES=1
+# to skip cleanup when you need to inspect the workspaces afterward
+# (e.g. to pull container logs or re-trigger an A2A round-trip).
+KEEP_WORKSPACES="${KEEP_WORKSPACES:-0}"
+
+ts() { date -u +%Y-%m-%dT%H:%M:%S.%3NZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%SZ; }
+
+emit() {
+  # One JSON line per event so the output is machine-readable.
+  printf '{"ts":"%s","event":"%s","data":%s}\n' "$(ts)" "$1" "${2:-null}"
+}
+
+# Helper that adds Authorization + X-Tenant-Id headers when configured.
+# Local-dev runs (no ADMIN_TOKEN) get a no-op pass-through so a developer
+# can iterate against `http://localhost:8080` without setup ceremony.
+api() {
+  local args=()
+  [ -n "$ADMIN_TOKEN" ] && args+=(-H "Authorization: Bearer $ADMIN_TOKEN")
+  [ -n "$TENANT_ID" ]   && args+=(-H "X-Tenant-Id: $TENANT_ID")
+  curl -s "${args[@]}" "$@"
+}
+
+# Set early so we can reference it from the trap; populated as
+# workspaces come online and unset by the cleanup helper to avoid
+# repeat DELETEs on re-entry.
+PM_ID=""
+CHILD_ID=""
+
+cleanup() {
+  # `trap` ignores function return values, so don't capture/return $?
+  # — that would only mislead a future reader. Disable -e inside cleanup
+  # so a single curl failure doesn't abort the loop and leave the other
+  # workspace orphaned.
+  set +e
+  if [ "$KEEP_WORKSPACES" = "1" ]; then
+    emit "cleanup_skipped" "{\"reason\":\"KEEP_WORKSPACES=1\",\"pm_id\":\"$PM_ID\",\"child_id\":\"$CHILD_ID\"}"
+    return
+  fi
+  for id in "$CHILD_ID" "$PM_ID"; do
+    [ -z "$id" ] && continue
+    # Capture HTTP status separately from response body so a 401/403/5xx
+    # surfaces as a `cleanup_failed` event instead of a silent leak. The
+    # operator can then re-run cleanup-rogue-workspaces.sh with fresh
+    # credentials. ADMIN_TOKEN expiry mid-run is the realistic failure
+    # mode here; without this we'd swallow it under `>/dev/null 2>&1`.
+    code=$(api -o /dev/null -w '%{http_code}' -X DELETE "$PLATFORM/workspaces/$id" 2>/dev/null || echo "curl_err")
+    if [ "$code" = "200" ] || [ "$code" = "204" ] || [ "$code" = "404" ]; then
+      # 404 = already gone (race with a concurrent operator). Treat as
+      # success since the post-condition (workspace absent) holds.
+      emit "cleanup_deleted" "{\"workspace_id\":\"$id\",\"http_code\":\"$code\"}"
+    else
+      emit "cleanup_failed" "{\"workspace_id\":\"$id\",\"http_code\":\"$code\",\"hint\":\"workspace may be leaked — re-run cleanup-rogue-workspaces.sh\"}"
+    fi
+  done
+}
+trap cleanup EXIT INT TERM
+
+emit "run_started" "{\"platform\":\"$PLATFORM\",\"tenant_id\":\"$TENANT_ID\",\"synthesis_depth\":$SYNTHESIS_DEPTH,\"a2a_timeout_secs\":$A2A_TIMEOUT,\"dry_run\":$([ \"$DRY_RUN\" = \"1\" ] && echo true || echo false)}"
+
+if [ "$DRY_RUN" = "1" ]; then
+  cat >&2 <<EOF
+
+=========================================
+  DRY RUN — no state will be mutated.
+=========================================
+
+Would target: $PLATFORM
+Tenant:       ${TENANT_ID:-<local — no tenant scoping>}
+Auth:         $([ -n "$ADMIN_TOKEN" ] && echo "Bearer ***${ADMIN_TOKEN: -4}" || echo "<none — local dev>")
+
+Would provision:
+  PM (coordinator, tier=2, template=claude-code-default)
+  Researcher (child, tier=2, template=langgraph)
+
+Would send synthesis-heavy task: $SYNTHESIS_DEPTH delegations + 600w
+synthesis. Coordinator A2A timeout: ${A2A_TIMEOUT}s.
+
+Workspaces would be auto-deleted on script exit (override with
+KEEP_WORKSPACES=1).
+
+Re-run without DRY_RUN=1 to execute.
+
+EOF
+  exit 0
+fi
+
+# ---- Setup: coordinator + 1 child ----
+emit "provisioning_pm" null
+R=$(api -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \
+  -d '{"name":"PM","role":"Coordinator — delegates and synthesizes","tier":2,"template":"claude-code-default"}')
+PM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
+[ -n "$PM_ID" ] || { echo "ERROR: PM create failed: $R" >&2; exit 1; }
+emit "pm_provisioned" "{\"workspace_id\":\"$PM_ID\"}"
+
+emit "provisioning_child" null
+R=$(api -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \
+  -d '{"name":"Researcher","role":"Returns short research findings","tier":2,"template":"langgraph"}')
+CHILD_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
+[ -n "$CHILD_ID" ] || { echo "ERROR: child create failed: $R" >&2; exit 1; }
+emit "child_provisioned" "{\"workspace_id\":\"$CHILD_ID\"}"
+
+api -X PATCH "$PLATFORM/workspaces/$CHILD_ID" -H 'Content-Type: application/json' \
+  -d "{\"parent_id\":\"$PM_ID\"}" > /dev/null
+api -X POST "$PLATFORM/workspaces/$CHILD_ID/secrets" -H 'Content-Type: application/json' \
+  -d "{\"key\":\"OPENROUTER_API_KEY\",\"value\":\"$OR_KEY\"}" > /dev/null
+
+# ---- Wait for both online ----
+wait_online() {
+  local id="$1"; local label="$2"
+  for i in $(seq 1 30); do
+    s=$(api "$PLATFORM/workspaces/$id" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null)
+    [ "$s" = "online" ] && { emit "online" "{\"workspace\":\"$label\",\"after_polls\":$i}"; return 0; }
+    sleep 3
+  done
+  emit "online_timeout" "{\"workspace\":\"$label\"}"
+  return 1
+}
+wait_online "$PM_ID"    "PM"    || exit 2
+wait_online "$CHILD_ID" "child" || exit 2
+
+# ---- Build a synthesis-heavy kickoff task ----
+# The task asks the coordinator to delegate N times, each time with a
+# different sub-question, then aggregate findings into a single report.
+# The synthesis phase happens entirely inside the coordinator's A2A
+# handler post-delegation, which is the exact code path Issue 4 named.
+TASK="You are coordinating a research analysis. Delegate $SYNTHESIS_DEPTH separate sub-questions to the Researcher (one at a time, sequentially — wait for each response before sending the next), then synthesize all findings into a single coherent report. Sub-questions: (a) historical context of distributed consensus, (b) modern Byzantine-fault-tolerant protocols, (c) practical trade-offs between Raft and Paxos. After all delegations complete, write a 600-word synthesis comparing the three responses and drawing one cross-cutting insight. Do not respond until the synthesis is complete."
+
+# ---- Time the A2A kickoff round-trip ----
+emit "a2a_kickoff_sent" "{\"to\":\"$PM_ID\",\"task_chars\":${#TASK}}"
+START_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
+
+# Use --max-time to bound this measurement (else the script could itself
+# hang past sensible limits). The bound is a measurement-side timeout,
+# NOT a platform-side timeout — the latter is what we're trying to
+# detect.
+RESP=$(api --max-time "$A2A_TIMEOUT" -X POST "$PLATFORM/workspaces/$PM_ID/a2a" \
+  -H "Content-Type: application/json" \
+  -d "$(python3 -c "
+import json,sys
+print(json.dumps({
+  'method':'message/send',
+  'params':{
+    'message':{
+      'role':'user',
+      'parts':[{'type':'text','text':sys.argv[1]}]
+    }
+  }
+}))
+" "$TASK")" || RESP="<curl_failed_or_timed_out>")
+
+END_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
+ELAPSED_SECS=$(python3 -c "print(round(($END_NS - $START_NS) / 1e9, 2))")
+
+emit "a2a_response_observed" "{\"elapsed_secs\":$ELAPSED_SECS,\"response_chars\":${#RESP},\"response_head\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1][:200]))" "$RESP")}"
+
+# ---- Pull heartbeat trace from the platform ----
+# The heartbeat endpoint records workspace liveness pings. If the
+# platform implements per-task bounds, the trace will show a status
+# transition (e.g. terminated) within the run window. Absence of any
+# such transition over a 10min synthesis is the empirical evidence
+# that no platform ceiling fired.
+emit "fetching_heartbeat_trace" null
+HB=$(api "$PLATFORM/workspaces/$PM_ID/heartbeat-history?since_secs=$A2A_TIMEOUT" 2>&1 || echo "<endpoint_unavailable>")
+emit "heartbeat_trace" "{\"raw\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1]))" "$HB")}"
+
+# ---- Summary ----
+emit "run_completed" "{\"elapsed_secs\":$ELAPSED_SECS,\"pm_id\":\"$PM_ID\",\"child_id\":\"$CHILD_ID\"}"
+
+cat <<EOF >&2
+
+=========================================
+  Measurement complete.
+  Coordinator response time: ${ELAPSED_SECS}s
+  PM workspace:    $PM_ID
+  Child workspace: $CHILD_ID
+=========================================
+
+Interpretation guide:
+
+  ELAPSED_SECS < 60   → Synthesis completed quickly; not informative
+                        about platform bounds (LLM was just fast).
+                        Re-run with SYNTHESIS_DEPTH=8 to force longer
+                        synthesis.
+
+  60 <= ELAPSED < 300 → Within DELEGATION_TIMEOUT. Doesn't prove or
+                        refute Issue 4 — the HTTP-level timeout would
+                        be sufficient if synthesis happened to fall
+                        under it.
+
+  ELAPSED >= 300      → BUG CONFIRMED IF heartbeat_trace shows no
+                        platform-side transition. Coordinator ran past
+                        DELEGATION_TIMEOUT without any platform ceiling
+                        kicking in — exactly the gap the RFC V1.0 plans
+                        to close with MAX_TASK_EXECUTION_SECS.
+
+  curl_failed_or_timed_out → \$A2A_TIMEOUT exceeded. Either the
+                        coordinator is genuinely hung (likely) or
+                        synthesis is just very slow. Pull workspace
+                        status separately to disambiguate.
+
+Heartbeat trace caveats:
+
+  If heartbeat_trace.raw is the literal string "<endpoint_unavailable>"
+  the platform's /heartbeat-history endpoint is missing or 404'd; the
+  measurement is INCONCLUSIVE on the bound question because we cannot
+  observe whether a platform-side transition fired. Either wire the
+  endpoint or replace this trace pull with an equivalent Datadog query
+  for the workspace's heartbeat metric and re-run.
+
+Workspaces (auto-deleted on exit unless KEEP_WORKSPACES=1):
+  PM:    $PM_ID
+  Child: $CHILD_ID
+
+EOF
@@ -9,45 +9,45 @@ require (
 	github.com/alicebob/miniredis/v2 v2.37.0
 	github.com/creack/pty v1.1.18
 	github.com/docker/docker v28.5.2+incompatible
-	github.com/docker/go-connections v0.6.0
-	github.com/gin-contrib/cors v1.7.2
-	github.com/gin-gonic/gin v1.10.0
+	github.com/docker/go-connections v0.7.0
+	github.com/gin-contrib/cors v1.7.7
+	github.com/gin-gonic/gin v1.12.0
 	github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1
-	github.com/golang-jwt/jwt/v5 v5.2.2
+	github.com/golang-jwt/jwt/v5 v5.3.1
 	github.com/google/uuid v1.6.0
 	github.com/gorilla/websocket v1.5.3
 	github.com/lib/pq v1.10.9
 	github.com/opencontainers/image-spec v1.1.1
-	github.com/redis/go-redis/v9 v9.7.3
+	github.com/redis/go-redis/v9 v9.19.0
 	github.com/robfig/cron/v3 v3.0.1
-	golang.org/x/crypto v0.49.0
+	golang.org/x/crypto v0.50.0
 	gopkg.in/yaml.v3 v3.0.1
 )

 require (
-	github.com/Microsoft/go-winio v0.4.21 // indirect
-	github.com/bytedance/sonic v1.11.6 // indirect
-	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/Microsoft/go-winio v0.6.2 // indirect
+	github.com/bytedance/gopkg v0.1.3 // indirect
+	github.com/bytedance/sonic v1.15.0 // indirect
+	github.com/bytedance/sonic/loader v0.5.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.4 // indirect
-	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/cloudwego/base64x v0.1.6 // indirect
 	github.com/containerd/errdefs v1.0.0 // indirect
 	github.com/containerd/errdefs/pkg v0.3.0 // indirect
 	github.com/containerd/log v0.1.0 // indirect
-	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/distribution/reference v0.6.0 // indirect
 	github.com/docker/go-units v0.5.0 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
-	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
-	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.12 // indirect
+	github.com/gin-contrib/sse v1.1.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-playground/locales v0.14.1 // indirect
 	github.com/go-playground/universal-translator v0.18.1 // indirect
-	github.com/go-playground/validator/v10 v10.20.0 // indirect
-	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/go-playground/validator/v10 v10.30.1 // indirect
+	github.com/goccy/go-json v0.10.5 // indirect
+	github.com/goccy/go-yaml v1.19.2 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
@@ -57,11 +57,14 @@ require (
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/morikuni/aec v1.1.0 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
-	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	github.com/quic-go/qpack v0.6.0 // indirect
+	github.com/quic-go/quic-go v0.59.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/ugorji/go/codec v1.3.1 // indirect
 	github.com/yuin/gopher-lua v1.1.1 // indirect
+	go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect
 	go.opentelemetry.io/otel v1.43.0 // indirect
@@ -70,10 +73,11 @@ require (
 	go.opentelemetry.io/otel/sdk v1.43.0 // indirect
 	go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect
 	go.opentelemetry.io/otel/trace v1.43.0 // indirect
-	golang.org/x/arch v0.8.0 // indirect
+	go.uber.org/atomic v1.11.0 // indirect
+	golang.org/x/arch v0.23.0 // indirect
 	golang.org/x/net v0.52.0 // indirect
-	golang.org/x/sys v0.42.0 // indirect
-	golang.org/x/text v0.35.0 // indirect
+	golang.org/x/sys v0.43.0 // indirect
+	golang.org/x/text v0.36.0 // indirect
 	golang.org/x/time v0.15.0 // indirect
 	google.golang.org/protobuf v1.36.11 // indirect
 	gotest.tools/v3 v3.5.2 // indirect
@@ -2,8 +2,8 @@ github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEK
 github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
 github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
-github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro=
-github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
+github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
+github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f h1:YkLRhUg+9qr9OV9N8dG1Hj0Ml7TThHlRwh5F//oUJVs=
 github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f/go.mod h1:NqdtlWZDJvpXNJRHnMkPhTKHdA1LZTNH+63TB66JSOU=
 github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d h1:GpYhP6FxaJZc1Ljy5/YJ9ZIVGvfOqZBmDolNr2S5x2g=
@@ -14,18 +14,18 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
 github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
 github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
 github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
-github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
-github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
-github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
-github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
+github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
+github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
+github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
+github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE=
+github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
 github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
 github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
-github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
-github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
-github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
+github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
 github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
 github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
@@ -37,26 +37,24 @@ github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
-github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
 github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM=
 github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
-github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
+github.com/docker/go-connections v0.7.0 h1:6SsRfJddP22WMrCkj19x9WKjEDTB+ahsdiGYf0mN39c=
+github.com/docker/go-connections v0.7.0/go.mod h1:no1qkHdjq7kLMGUXYAduOhYPSJxxvgWBh7ogVvptn3Q=
 github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
-github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
-github.com/gin-contrib/cors v1.7.2 h1:oLDHxdg8W/XDoN/8zamqk/Drgt4oVZDvaV0YmvVICQw=
-github.com/gin-contrib/cors v1.7.2/go.mod h1:SUJVARKgQ40dmrzgXEVxj2m7Ig1v1qIboQkPDTQ9t2E=
-github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
-github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
-github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
-github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw=
+github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
+github.com/gin-contrib/cors v1.7.7 h1:Oh9joP463x7Mw72vhvJ61YQm8ODh9b04YR7vsOErD0Q=
+github.com/gin-contrib/cors v1.7.7/go.mod h1:K5tW0RkzJtWSiOdikXloy8VEZlgdVNpHNw8FpjUPNrE=
+github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
+github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
+github.com/gin-gonic/gin v1.12.0 h1:b3YAbrZtnf8N//yjKeU2+MQsh2mY5htkZidOM7O0wG8=
+github.com/gin-gonic/gin v1.12.0/go.mod h1:VxccKfsSllpKshkBWgVgRniFFAzFb9csfngsqANjnLc=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@@ -68,14 +66,16 @@ github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/o
 github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
 github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
 github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
-github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/go-playground/validator/v10 v10.30.1 h1:f3zDSN/zOma+w6+1Wswgd9fLkdwy06ntQJp0BBvFG0w=
+github.com/go-playground/validator/v10 v10.30.1/go.mod h1:oSuBIQzuJxL//3MelwSLD5hc2Tu889bF0Idm9Dg26cM=
 github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 h1:wG8n/XJQ07TmjbITcGiUaOtXxdrINDz1b0J1w0SzqDc=
 github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1/go.mod h1:A2S0CWkNylc2phvKXWBBdD3K0iGnDBGbzRpISP2zBl8=
-github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
-github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
-github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
-github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
+github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
+github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
+github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
+github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@@ -88,10 +88,8 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l8
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
-github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
-github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -121,41 +119,45 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
 github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
-github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
-github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
+github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
-github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
+github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
+github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
+github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw=
+github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
+github.com/redis/go-redis/v9 v9.19.0 h1:XPVaaPSnG6RhYf7p+rmSa9zZfeVAnWsH5h3lxthOm/k=
+github.com/redis/go-redis/v9 v9.19.0/go.mod h1:v/M13XI1PVCDcm01VtPFOADfZtHf8YW3baQf57KlIkA=
 github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
 github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
-github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
-github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
-github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
-github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/ugorji/go/codec v1.3.1 h1:waO7eEiFDwidsBN6agj1vJQ4AG7lh2yqXyOXqhgQuyY=
+github.com/ugorji/go/codec v1.3.1/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
 github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
 github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
+github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
+github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
+go.mongodb.org/mongo-driver/v2 v2.5.0 h1:yXUhImUjjAInNcpTcAlPHiT7bIXhshCTL3jVBkF3xaE=
+go.mongodb.org/mongo-driver/v2 v2.5.0/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
@@ -176,21 +178,21 @@ go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09
 go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
 go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g=
 go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk=
-golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
-golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
-golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
-golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
-golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
+go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
+go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y=
+go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU=
+golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg=
+golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
+golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
+golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
 golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
 golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
-golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
-golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
-golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
-golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
+golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
+golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
 golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
 golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
 google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
@@ -209,5 +211,3 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
 gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
-nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
-rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
@@ -27,6 +27,7 @@ from a2a_tools import (
    tool_recall_memory,
    tool_send_message_to_user,
 )
+from platform_tools.registry import TOOLS as _PLATFORM_TOOL_SPECS

 logger = logging.getLogger(__name__)

@@ -45,158 +46,27 @@ from a2a_client import (  # noqa: F401, E402
 from a2a_tools import report_activity  # noqa: F401, E402

 # --- Tool definitions (schemas) ---
+#
+# Built once at import time from the platform_tools registry. The MCP
+# `description` field is the spec's `short` line — that's the unified
+# tool description used by both the MCP tool listing AND the bullet
+# rendering in the agent-facing system-prompt section. The deeper
+# `when_to_use` guidance is appended to the system prompt only (it's
+# too long to live in MCP `description` without bloating every
+# tool-list response the model sees).

 TOOLS = [
    {
-        "name": "delegate_task",
-        "description": "Delegate a task to another workspace via A2A protocol and WAIT for the response. Use for quick tasks. The target must be a peer (sibling or parent/child). Use list_peers to find available targets.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "workspace_id": {
-                    "type": "string",
-                    "description": "Target workspace ID (from list_peers)",
-                },
-                "task": {
-                    "type": "string",
-                    "description": "The task description to send to the target workspace",
-                },
-            },
-            "required": ["workspace_id", "task"],
-        },
-    },
-    {
-        "name": "delegate_task_async",
-        "description": "Send a task to another workspace with a short timeout (fire-and-forget). Returns immediately — the target continues processing. Best when you don't need the result right away. Note: check_task_status may not work with all workspace implementations.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "workspace_id": {
-                    "type": "string",
-                    "description": "Target workspace ID (from list_peers)",
-                },
-                "task": {
-                    "type": "string",
-                    "description": "The task description to send to the target workspace",
-                },
-            },
-            "required": ["workspace_id", "task"],
-        },
-    },
-    {
-        "name": "check_task_status",
-        "description": "Check the status of a previously submitted async task via tasks/get. Note: only works if the target workspace's A2A implementation supports task persistence. May return 'not found' for completed tasks.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "workspace_id": {
-                    "type": "string",
-                    "description": "The workspace ID the task was sent to",
-                },
-                "task_id": {
-                    "type": "string",
-                    "description": "The task_id returned by delegate_task_async",
-                },
-            },
-            "required": ["workspace_id", "task_id"],
-        },
-    },
-    {
-        "name": "list_peers",
-        "description": "List all workspaces this agent can communicate with (siblings and parent/children). Returns name, ID, status, and role for each peer.",
-        "inputSchema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "get_workspace_info",
-        "description": "Get this workspace's own info — ID, name, role, tier, parent, status.",
-        "inputSchema": {"type": "object", "properties": {}},
-    },
-    {
-        "name": "send_message_to_user",
-        "description": "Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "message": {
-                    "type": "string",
-                    # The "no URLs in message text" rule is the single biggest
-                    # cause of bad chat UX: agents drop catbox.moe / file://
-                    # / temporary upload-host links into the prose, the
-                    # canvas renders them as plain markdown links the user
-                    # can't preview, and SaaS deployments often can't even
-                    # reach those external hosts. Every download MUST go
-                    # through the structured `attachments` field below.
-                    "description": (
-                        "Caption text for the chat bubble. Required even when sending "
-                        "attachments — set to a short label like 'Here's the build:' "
-                        "or 'Done — see attached.'\n\n"
-                        "DO NOT paste file URLs, download links, or container paths in "
-                        "this string. Files MUST go through the `attachments` field, "
-                        "which renders as a clickable download chip and works on SaaS "
-                        "deployments where external file-host URLs (catbox.moe, file://, "
-                        "etc.) are unreachable from the user's browser."
-                    ),
-                },
-                "attachments": {
-                    "type": "array",
-                    "description": (
-                        "REQUIRED for any file delivery. Pass absolute file paths inside "
-                        "THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) "
-                        "— the platform uploads each file and returns a download chip "
-                        "with the file's icon + name + size in the user's chat. The chip "
-                        "works in SaaS deployments because the URL is platform-served, "
-                        "not an external host.\n\n"
-                        "USE THIS instead of: pasting URLs in `message`, base64-encoding "
-                        "in the body, or telling the user to look at a path on disk. "
-                        "If the file isn't already on disk, write it first (Bash, Write "
-                        "tool, etc.) then pass its path here. 25 MB per file cap."
-                    ),
-                    "items": {"type": "string"},
-                },
-            },
-            "required": ["message"],
-        },
-    },
-    {
-        "name": "commit_memory",
-        "description": "Append a new memory row to persistent storage. Each call CREATES a row — does not overwrite existing memories with the same content. Use to remember decisions, task results, and context that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org). GLOBAL writes require tier-0 (root) workspace; lower-tier callers get an RBAC error.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "content": {
-                    "type": "string",
-                    "description": "The information to remember — be detailed and specific",
-                },
-                "scope": {
-                    "type": "string",
-                    "enum": ["LOCAL", "TEAM", "GLOBAL"],
-                    "description": "Memory scope (default: LOCAL)",
-                },
-            },
-            "required": ["content"],
-        },
-    },
-    {
-        "name": "recall_memory",
-        "description": "Substring-search persistent memory and return ALL matching rows (no pagination). Empty query returns every memory accessible at the given scope. Server-side filter is case-insensitive substring match on `content`. Use at the start of conversations to recall prior context — calling once with empty query is cheap and avoids missing relevant memories that don't match a narrow keyword.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "Search query (empty returns all memories)",
-                },
-                "scope": {
-                    "type": "string",
-                    "enum": ["LOCAL", "TEAM", "GLOBAL", ""],
-                    "description": "Filter by scope (empty returns all accessible)",
-                },
-            },
-        },
-    },
+        "name": _spec.name,
+        "description": _spec.short,
+        "inputSchema": _spec.input_schema,
+    }
+    for _spec in _PLATFORM_TOOL_SPECS
 ]


+
+
 # --- Tool dispatch ---

 async def handle_tool_call(name: str, arguments: dict) -> str:
@@ -421,8 +421,8 @@ class BaseAdapter(ABC):
        from coordinator import get_children, get_parent_context, build_children_description
        from prompt import build_system_prompt, get_peer_capabilities, get_platform_instructions
        from builtin_tools.approval import request_approval
-        from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
-        from builtin_tools.memory import commit_memory, search_memory
+        from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
+        from builtin_tools.memory import commit_memory, recall_memory
        from builtin_tools.sandbox import run_code

        platform_url = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
@@ -455,8 +455,14 @@ class BaseAdapter(ABC):
                    seen_skill_ids.add(skill.metadata.id)
        logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")

-        # Assemble tools: 6 core + skill tools
-        all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
+        # Core platform tools — names mirror the platform_tools registry,
+        # so the names referenced in get_a2a_instructions/get_hma_instructions
+        # are guaranteed to exist as @tool symbols here. The structural
+        # alignment test in tests/test_platform_tools.py pins this.
+        all_tools = [
+            delegate_task, delegate_task_async, check_task_status,
+            request_approval, commit_memory, recall_memory, run_code,
+        ]
        for skill in loaded_skills:
            all_tools.extend(skill.tools)

@@ -2,7 +2,7 @@

 Delegations are non-blocking: the tool fires the A2A request in the background
 and returns immediately with a task_id. The agent can check status anytime via
-check_delegation_status, or just continue working and check later.
+check_task_status, or just continue working and check later.

 When the delegate responds, the result is stored and the agent is notified
 via a status update.
@@ -44,7 +44,7 @@ class DelegationStatus(str, Enum):
    # The reply will arrive via the platform's stitch path when the
    # peer finishes its current work. The LLM should WAIT, not retry,
    # and definitely not fall back to doing the work itself — see the
-    # check_delegation_status docstring for the prompt-side guidance.
+    # check_task_status docstring for the prompt-side guidance.
    QUEUED = "queued"
    COMPLETED = "completed"
    FAILED = "failed"
@@ -110,7 +110,7 @@ async def _record_delegation_on_platform(task_id: str, target_workspace_id: str,
    Best-effort POST to /workspaces/<self>/delegations/record. The agent still
    fires A2A directly for speed + OTEL propagation, but the platform's
    GET /delegations endpoint now mirrors the same set an agent's local
-    check_delegation_status sees.
+    check_task_status sees.
    """
    try:
        async with httpx.AsyncClient(timeout=10) as client:
@@ -129,11 +129,11 @@ async def _record_delegation_on_platform(task_id: str, target_workspace_id: str,
 async def _refresh_queued_from_platform(task_id: str) -> bool:
    """Lazy-refresh a QUEUED delegation's local state from the platform.

-    Called by check_delegation_status when local status is QUEUED. The
+    Called by check_task_status when local status is QUEUED. The
    platform's drain stitch (a2a_queue.go) updates the delegate_result
    activity_logs row when a queued delegation eventually completes,
    but it has no callback to this runtime — without this lazy refresh,
-    the LLM polling check_delegation_status would see "queued" forever
+    the LLM polling check_task_status would see "queued" forever
    even after the platform has the result.

    Returns True if the local delegation was updated to a terminal state
@@ -215,7 +215,7 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
    delegation.status = DelegationStatus.IN_PROGRESS

    # #64: register on the platform so GET /workspaces/<self>/delegations
-    # sees the same set as check_delegation_status. Best-effort — platform
+    # sees the same set as check_task_status. Best-effort — platform
    # unreachability must not block the actual A2A delegation.
    await _record_delegation_on_platform(task_id, workspace_id, task)

@@ -286,7 +286,7 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
                    # accepted the request but the peer's runtime is
                    # mid-task. Platform-side drain will deliver the
                    # reply asynchronously. Mark QUEUED locally so
-                    # check_delegation_status can surface that state
+                    # check_task_status can surface that state
                    # to the LLM with explicit "wait, don't bypass"
                    # guidance. Do NOT mark FAILED — the request is
                    # alive in the platform's queue, not lost.
@@ -371,14 +371,36 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):


@tool
-async def delegate_to_workspace(
+async def delegate_task(
+    workspace_id: str,
+    task: str,
+) -> str:
+    """Delegate a task to a peer workspace via A2A and WAIT for the response.
+
+    Synchronous variant — blocks until the peer replies (or the platform's
+    A2A round-trip times out). Use this for QUICK questions and small
+    sub-tasks where you can afford to wait inline.
+
+    For longer-running work (research, multi-minute jobs) use
+    delegate_task_async + check_task_status instead so you don't hold
+    this workspace busy waiting.
+
+    Tool name + description are sourced from the platform_tools registry —
+    a single ToolSpec drives MCP, LangChain, and system-prompt docs.
+    """
+    from a2a_tools import tool_delegate_task
+    return await tool_delegate_task(workspace_id, task)
+
+
+@tool
+async def delegate_task_async(
    workspace_id: str,
    task: str,
 ) -> dict:
    """Delegate a task to a peer workspace via A2A protocol (non-blocking).

    Sends the task in the background and returns immediately with a task_id.
-    Use check_delegation_status to poll for the result, or continue working
+    Use check_task_status to poll for the result, or continue working
    and check later. The delegate works independently.

    Args:
@@ -386,7 +408,7 @@ async def delegate_to_workspace(
        task: The task description to send to the peer.

    Returns:
-        A dict with task_id and status="delegated". Use check_delegation_status(task_id) to get results.
+        A dict with task_id and status="delegated". Use check_task_status(task_id) to get results.
    """
    task_id = str(uuid.uuid4())

@@ -417,12 +439,12 @@ async def delegate_to_workspace(
        "success": True,
        "task_id": task_id,
        "status": "delegated",
-        "message": f"Task delegated to {workspace_id}. Use check_delegation_status('{task_id}') to get the result when ready.",
+        "message": f"Task delegated to {workspace_id}. Use check_task_status('{task_id}') to get the result when ready.",
    }


@tool
-async def check_delegation_status(
+async def check_task_status(
    task_id: str = "",
 ) -> dict:
    """Check the status of a delegated task, or list all active delegations.
@@ -434,7 +456,7 @@ async def check_delegation_status(
      processing a prior task. The reply WILL arrive — the platform's
      drain re-dispatches when the peer is free. This tool transparently
      polls the platform for the eventual outcome on each call, so
-      keep polling check_delegation_status periodically and you'll see
+      keep polling check_task_status periodically and you'll see
      the status flip to "completed" / "failed" automatically.
      Do NOT retry the delegation. Do NOT do the work yourself.
      Acknowledge to the user that the peer is busy and will reply,
@@ -445,7 +467,7 @@ async def check_delegation_status(
      yourself if status is "failed", never if status is "queued".

    Args:
-        task_id: The task_id returned by delegate_to_workspace. If empty, lists all delegations.
+        task_id: The task_id returned by delegate_task_async. If empty, lists all delegations.

    Returns:
        Status and result (if completed) of the delegation.
@@ -493,4 +515,14 @@ async def check_delegation_status(
    elif delegation.status == DelegationStatus.FAILED:
        result["error"] = delegation.error

+    # RFC #2251 V1.0 reproduction-harness instrumentation. Every poll of
+    # check_task_status emits a phase=check_status line so the harness
+    # operator can tell whether a coordinator stuck for 8 minutes was
+    # polling-children-the-whole-time vs synthesizing-after-children-done.
+    # `grep rfc2251_phase=check_status` in the workspace's container log
+    # gives the polling pattern. Strip when V1.0 ships.
+    logger.info(
+        "rfc2251_phase=check_status task_id=%s peer=%s status=%s",
+        task_id, delegation.workspace_id, delegation.status.value,
+    )
    return result
@@ -8,7 +8,7 @@ Hierarchical Memory Architecture:
 RBAC enforcement
 ----------------
 ``commit_memory`` requires the ``"memory.write"`` action.
-``search_memory`` requires the ``"memory.read"`` action.
+``recall_memory`` requires the ``"memory.read"`` action.
 Roles are read from ``config.yaml`` under ``rbac.roles`` (default: operator).

 Audit trail
@@ -188,7 +188,7 @@ async def commit_memory(content: str, scope: str = "LOCAL") -> dict:


@tool
-async def search_memory(query: str = "", scope: str = "") -> dict:
+async def recall_memory(query: str = "", scope: str = "") -> dict:
    """Search stored memories.

    Args:
@@ -81,7 +81,7 @@ def build_children_description(children: list[dict]) -> str:
        children,
        heading="## Your Team (sub-workspaces you coordinate)",
        instruction=(
-            "Use the `delegate_to_workspace` tool to send tasks to the chosen member. "
+            "Use the `delegate_task_async` tool to send tasks to the chosen member. "
            "Only delegate to members listed above."
        ),
    )
@@ -92,7 +92,7 @@ def build_children_description(children: list[dict]) -> str:
            "",
            "### Coordination Rules — MANDATORY",
            "1. You are a COORDINATOR. Your ONLY job is to delegate and synthesize. NEVER do the work yourself.",
-            "2. For EVERY task, use `delegate_to_workspace` to send it to the appropriate team member(s). "
+            "2. For EVERY task, use `delegate_task_async` to send it to the appropriate team member(s). "
            "Do this BEFORE writing any analysis, code, or research yourself.",
            "3. If a task spans multiple members, delegate to ALL of them in parallel and aggregate results.",
            "4. If ALL members are offline/paused, tell the caller which members are unavailable. "
@@ -120,23 +120,56 @@ async def route_task_to_team(
        task: The task description to route.
        preferred_member_id: Optional — directly delegate to this member.
    """
-    from builtin_tools.delegation import delegate_to_workspace as delegate
+    import time
+    from builtin_tools.delegation import delegate_task_async as delegate
+
+    # RFC #2251 V1.0 reproduction-harness instrumentation. Phase-tagged log
+    # lines correlate with scripts/measure-coordinator-task-bounds.sh's
+    # external timing trace, so an operator running the harness against
+    # staging can answer "what phase was the coordinator in at minute 7?".
+    # `grep rfc2251_phase` on the workspace's container logs is the query.
+    # Strip when V1.0 ships and the phase data lands in the structured
+    # heartbeat payload instead.
+    _phase_t0 = time.monotonic()
+    logger.info(
+        "rfc2251_phase=route_start task_chars=%d preferred_member_id=%s",
+        len(task), preferred_member_id or "none",
+    )

    children = await get_children()
+    logger.info(
+        "rfc2251_phase=children_fetched count=%d elapsed_ms=%d",
+        len(children), int((time.monotonic() - _phase_t0) * 1000),
+    )
+
    decision = build_team_routing_payload(
        children,
        task=task,
        preferred_member_id=preferred_member_id,
    )
+    logger.info(
+        "rfc2251_phase=routing_decided action=%s elapsed_ms=%d",
+        decision.get("action", "unknown"), int((time.monotonic() - _phase_t0) * 1000),
+    )

    if decision.get("action") == "delegate_to_preferred_member":
        # Async delegation — returns immediately with task_id
+        target = decision["preferred_member_id"]
+        logger.info(
+            "rfc2251_phase=delegate_invoked target=%s elapsed_ms=%d",
+            target, int((time.monotonic() - _phase_t0) * 1000),
+        )
        result = await delegate.ainvoke(
-            {
-                "workspace_id": decision["preferred_member_id"],
-                "task": task,
-            }
+            {"workspace_id": target, "task": task}
+        )
+        logger.info(
+            "rfc2251_phase=delegate_returned target=%s task_id=%s elapsed_ms=%d",
+            target, result.get("task_id", "n/a"), int((time.monotonic() - _phase_t0) * 1000),
        )
        return result

+    logger.info(
+        "rfc2251_phase=route_returning_decision_only elapsed_ms=%d",
+        int((time.monotonic() - _phase_t0) * 1000),
+    )
    return decision
@@ -273,29 +273,19 @@ def get_system_prompt(config_path: str, fallback: str | None = None) -> str | No
    return fallback


-_A2A_INSTRUCTIONS_MCP = """## Inter-Agent Communication
-You have MCP tools for communicating with other workspaces:
- list_peers: discover available peer workspaces (name, ID, status, role)
- delegate_task: send a task and WAIT for the response (for quick tasks)
- delegate_task_async: send a task and return immediately with a task_id (for long tasks)
- check_task_status: poll an async task's status and get results when done
- get_workspace_info: get your own workspace info
-
-For quick questions, use delegate_task (synchronous).
-For long-running work (building pages, running audits), use delegate_task_async + check_task_status.
-Always use list_peers first to discover available workspace IDs.
-Access control is enforced — you can only reach siblings and parent/children.
-
-PROACTIVE MESSAGING: Use send_message_to_user to push messages to the user's chat at ANY time:
- Acknowledge tasks immediately: "Got it, delegating to the team now..."
- Send progress updates during long work: "Research Lead finished, waiting on Dev Lead..."
- Deliver follow-up results: "All teams reported back. Here's the synthesis: ..."
-This lets you respond quickly ("I'll work on this") and come back later with results.
-
-If delegate_task returns a DELEGATION FAILED message, do NOT forward the raw error to the user.
-Instead: (1) try delegating to a different peer, (2) handle the task yourself, or
-(3) tell the user which peer is unavailable and provide your own best answer."""
+# Tool-usage instructions for system-prompt injection. Generated from
+# the platform_tools registry — every tool name, description, and usage
+# guidance comes from the canonical ToolSpec. Adding/renaming a tool in
+# registry.py automatically flows through here.

+_A2A_FOOTER = (
+    "Always use list_peers first to discover available workspace IDs. "
+    "Access control is enforced — you can only reach siblings and parent/children. "
+    "If a delegation returns a DELEGATION FAILED message, do NOT forward "
+    "the raw error to the user. Instead: (1) try a different peer, "
+    "(2) handle the task yourself, or (3) tell the user which peer is "
+    "unavailable and provide your own best answer."
+)

 _A2A_INSTRUCTIONS_CLI = """## Inter-Agent Communication
 You can delegate tasks to other workspaces using the a2a command:
@@ -308,40 +298,111 @@ You can delegate tasks to other workspaces using the a2a command:
 For quick questions, use sync delegate. For long tasks, use --async + status.
 Only delegate to peers listed by the peers command (access control enforced)."""

+# Maps every a2a-section registry tool to the substring that MUST appear
+# in `_A2A_INSTRUCTIONS_CLI` for CLI-runtime agents to discover it. The
+# CLI subprocess interface uses different command-shape names than the
+# MCP tool names (e.g. `peers` vs `list_peers`), so this is NOT a
+# generated mapping — it's a hand-maintained alignment table.
+#
+# `None` declares "this MCP tool is intentionally NOT exposed via the
+# CLI subprocess interface" — make the decision explicit so adding a
+# new registry tool fails the alignment test until the mapping is
+# updated. test_platform_tools.py asserts both directions:
+#
+#   1. every a2a tool in the registry is keyed here (no silent omission)
+#   2. every non-None substring actually appears in `_A2A_INSTRUCTIONS_CLI`
+#
+# Why hand-maintained: the registry is the source of truth for
+# MCP-capable runtimes, but the CLI subprocess interface in
+# `molecule_runtime.a2a_cli` is a separate surface with its own command
+# vocabulary. Auto-generating CLI command lines from JSON-schema specs
+# would lose the human-readable invocation syntax (`delegate <ws> <task>`
+# vs. `--workspace_id=... --task=...`). The mapping + test gives us
+# alignment without forcing a uniform shape.
+_CLI_A2A_COMMAND_KEYWORDS: dict[str, str | None] = {
+    "list_peers": "peers",
+    "delegate_task": "delegate ",          # trailing space disambiguates from "--async" line
+    "delegate_task_async": "delegate --async",
+    "check_task_status": "status",
+    "get_workspace_info": "info",
+    # `send_message_to_user` is not exposed via the CLI subprocess
+    # interface today — it requires a structured `attachments` field
+    # that wouldn't survive a positional-arg shell invocation cleanly.
+    # CLI-runtime agents fall back to printing results to stdout (which
+    # the runtime forwards to the user) instead. If the a2a_cli ever
+    # grows a `say` or `message` subcommand, change `None` to that
+    # keyword and the alignment test will start passing.
+    "send_message_to_user": None,
+}
+
+
+def _validate_cli_a2a_command_keywords() -> None:
+    """Keep CLI instruction text aligned with command keyword mapping."""
+    missing = [
+        (tool_name, keyword)
+        for tool_name, keyword in _CLI_A2A_COMMAND_KEYWORDS.items()
+        if keyword is not None and keyword not in _A2A_INSTRUCTIONS_CLI
+    ]
+    if missing:
+        details = ", ".join(f"{tool_name}={keyword!r}" for tool_name, keyword in missing)
+        raise ValueError(
+            "CLI A2A command mapping is out of sync with _A2A_INSTRUCTIONS_CLI: "
+            f"{details}"
+        )
+
+
+_validate_cli_a2a_command_keywords()
+
+
+def _render_section(heading: str, specs, footer: str = "") -> str:
+    """Render a section: heading, per-tool bullet, per-tool when_to_use, footer."""
+    parts = [heading, ""]
+    for spec in specs:
+        parts.append(f"- **{spec.name}**: {spec.short}")
+    parts.append("")
+    for spec in specs:
+        parts.append(f"### {spec.name}")
+        parts.append(spec.when_to_use)
+        parts.append("")
+    if footer:
+        parts.append(footer)
+    return "\n".join(parts).rstrip() + "\n"
+

 def get_a2a_instructions(mcp: bool = True) -> str:
    """Return inter-agent communication instructions for system-prompt injection.

-    Pass `mcp=True` (default) for MCP-capable runtimes (Claude Code via SDK,
-    Codex). Pass `mcp=False` for CLI-only runtimes (Ollama, custom) that have
-    to call a2a_cli.py as a subprocess.
+    Generated from the platform_tools registry. Pass `mcp=True` (default)
+    for MCP-capable runtimes (claude-code, hermes, langchain, crewai).
+    Pass `mcp=False` for CLI-only runtimes (ollama, custom subprocess
+    runtimes that don't speak MCP) — those get a static block describing
+    the molecule_runtime.a2a_cli subprocess interface instead.
    """
-    return _A2A_INSTRUCTIONS_MCP if mcp else _A2A_INSTRUCTIONS_CLI
-
-
-_HMA_INSTRUCTIONS = """## Hierarchical Memory (HMA)
-You have persistent memory tools that survive across sessions and restarts:
-
- **commit_memory(content, scope)**: Save important information.
-  - LOCAL: private to you only (default)
-  - TEAM: shared with your parent workspace and siblings (same team)
-  - GLOBAL: shared with the entire org (only root workspaces can write)
-
- **recall_memory(query)**: Search your accessible memories. Returns LOCAL + TEAM + GLOBAL matches.
-
-**When to use memory:**
- After making a decision or learning something non-obvious → commit_memory("decision X because Y", scope="TEAM")
- Before starting work → recall_memory("what did the team decide about X")
- When you discover org-wide knowledge (repo locations, API patterns, conventions) → commit_memory(fact, scope="GLOBAL") if you are a root workspace, or scope="TEAM" to share with your team
- After completing a task → commit_memory("completed task X, PR #N opened", scope="TEAM") so your lead and teammates know
-
-**Memory is automatically recalled** at the start of each new session. Use it proactively during work to share context.
-"""
+    if not mcp:
+        return _A2A_INSTRUCTIONS_CLI
+    from platform_tools.registry import a2a_tools
+    return _render_section(
+        "## Inter-Agent Communication",
+        a2a_tools(),
+        footer=_A2A_FOOTER,
+    )


 def get_hma_instructions() -> str:
-    """Return HMA memory instructions for system-prompt injection."""
-    return _HMA_INSTRUCTIONS
+    """Return HMA persistent-memory instructions for system-prompt injection.
+
+    Generated from the platform_tools registry.
+    """
+    from platform_tools.registry import memory_tools
+    return _render_section(
+        "## Hierarchical Memory (HMA)",
+        memory_tools(),
+        footer=(
+            "Memory is automatically recalled at the start of each new "
+            "session. Use commit_memory proactively during work so future "
+            "sessions and teammates can recall what you learned."
+        ),
+    )


 # ========================================================================
@@ -337,11 +337,16 @@ async def main():  # pragma: no cover
                # Rebuild the agent's tool list from updated skills
                if hasattr(adapter, "all_tools") and hasattr(adapter, "system_prompt"):
                    from builtin_tools.approval import request_approval
-                    from builtin_tools.delegation import delegate_to_workspace
-                    from builtin_tools.memory import commit_memory, search_memory
+                    from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
+                    from builtin_tools.memory import commit_memory, recall_memory
                    from builtin_tools.sandbox import run_code
-                    base_tools = [delegate_to_workspace, request_approval,
-                                  commit_memory, search_memory, run_code]
+                    # Core platform tools mirror adapter_base.all_tools — must
+                    # match the platform_tools registry names so docs and tools
+                    # never drift.
+                    base_tools = [
+                        delegate_task, delegate_task_async, check_task_status,
+                        request_approval, commit_memory, recall_memory, run_code,
+                    ]
                    skill_tools = []
                    for sk in adapter.loaded_skills:
                        skill_tools.extend(sk.tools)
@@ -0,0 +1,13 @@
+"""Platform tools — single source of truth for tool naming and docs.
+
+The platform owns A2A and persistent-memory tooling (cross-cutting
+runtime concerns per project memory project_runtime_native_pluggable.md).
+Tools are defined ONCE in `registry.py`. Every adapter — MCP server,
+LangChain wrapper, any future SDK integration — consumes the specs to
+register the tool in its native format. Doc generators (system-prompt
+injection, canvas help, future doc sites) read from the same place.
+
+Adding a tool: append a ToolSpec to TOOLS in registry.py. Every
+adapter picks it up automatically; structural tests fail if any side
+drifts from the registry.
+"""
@@ -0,0 +1,403 @@
+"""Canonical registry of platform tool specs.
+
+Every tool the platform offers to agents (A2A delegation, persistent
+memory, broadcast, introspection) is defined ONCE in TOOLS below.
+Adapters consume these specs to register the tool in their native
+runtime format:
+
+  - a2a_mcp_server.py iterates `TOOLS` to build the MCP TOOLS list +
+    dispatches calls to spec.impl. No tool name or description is
+    hardcoded there.
+
+  - builtin_tools/{delegation,memory}.py define LangChain `@tool`
+    wrappers using `name=` from the spec; the wrapper body just
+    calls spec.impl.
+
+  - executor_helpers.get_a2a_instructions(mcp=True) /
+    get_hma_instructions() GENERATE the system-prompt doc string from
+    `TOOLS` — no hand-maintained instruction text for MCP-capable
+    runtimes.
+
+  - executor_helpers._A2A_INSTRUCTIONS_CLI is a SEPARATE hand-maintained
+    block for CLI subprocess runtimes (ollama and any other adapter
+    that drives a2a via `python3 -m molecule_runtime.a2a_cli ...`). It
+    uses different command-shape names than the registry tool names
+    (e.g. `peers` vs `list_peers`), so it cannot be auto-generated
+    from JSON-schema specs without losing the readable invocation
+    syntax. Its tool-coverage alignment with the registry is enforced
+    by the `_CLI_A2A_COMMAND_KEYWORDS` mapping in executor_helpers.py
+    and the alignment tests in test_platform_tools.py — adding a new
+    a2a tool here will fail those tests until the mapping is updated.
+
+Adding a new tool: append a ToolSpec to `TOOLS` below, then update
+`_CLI_A2A_COMMAND_KEYWORDS` in executor_helpers.py (set the value to
+the CLI subcommand keyword, or to `None` if the tool isn't exposed via
+the CLI subprocess interface). The structural alignment tests in
+workspace/tests/test_platform_tools.py fail otherwise.
+
+Renaming a tool: change `name` here. Search workspace/ for the old
+literal in case any non-adapter consumer (tests, plugin code) hard-coded
+it; update those manually. The grep is the audit, the test is the gate.
+
+Removing a tool: delete the entry AND its `_CLI_A2A_COMMAND_KEYWORDS`
+key. Adapters stop registering it automatically; doc generators stop
+mentioning it.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any, Literal
+
+from a2a_tools import (
+    tool_check_task_status,
+    tool_commit_memory,
+    tool_delegate_task,
+    tool_delegate_task_async,
+    tool_get_workspace_info,
+    tool_list_peers,
+    tool_recall_memory,
+    tool_send_message_to_user,
+)
+
+# Section name maps to the heading in the agent-facing system prompt.
+# Adding a new section: add a constant + create a corresponding
+# generator in executor_helpers (or generalize get_*_instructions).
+A2A_SECTION = "a2a"
+MEMORY_SECTION = "memory"
+
+Section = Literal["a2a", "memory"]
+
+
+@dataclass(frozen=True)
+class ToolSpec:
+    """Runtime-agnostic definition of one platform tool.
+
+    Each adapter (MCP, LangChain, future SDK) consumes the same spec.
+    Doc generators consume the same spec. There is no other source
+    of truth for tool naming or description.
+    """
+
+    name: str
+    """The exact name agents see. MUST match every adapter's
+    registered name and the literal that appears in agent-facing
+    instruction docs. Structural test enforces this."""
+
+    short: str
+    """One-line description. Used as the MCP `description` field
+    AND as the bullet line in agent-facing instruction docs."""
+
+    when_to_use: str
+    """Two-to-three-sentence agent-facing usage guidance — when
+    to call this tool, what it returns, what NOT to confuse it
+    with. Concatenated into the system prompt below the tool list."""
+
+    input_schema: dict[str, Any]
+    """JSON Schema for the tool's input parameters. Consumed
+    directly by the MCP server. LangChain derives its schema from
+    Python type annotations on the @tool function — alignment is
+    pinned by the structural test."""
+
+    impl: Callable[..., Awaitable[str]]
+    """The actual coroutine. Both adapters call this; only the
+    wrapping differs."""
+
+    section: Section
+    """Which agent-prompt section this tool belongs to (controls
+    which instruction generator emits it)."""
+
+
+# ---------------------------------------------------------------------------
+# A2A — inter-agent communication & broadcast
+# ---------------------------------------------------------------------------
+
+_DELEGATE_TASK = ToolSpec(
+    name="delegate_task",
+    short=(
+        "Delegate a task to a peer workspace via A2A and WAIT for the "
+        "response (synchronous)."
+    ),
+    when_to_use=(
+        "Use for QUICK questions and small sub-tasks where you can "
+        "afford to wait inline. Returns the peer's response text "
+        "directly. For longer-running work (research, multi-minute "
+        "jobs) use delegate_task_async + check_task_status instead "
+        "so you don't hold this workspace busy waiting."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "workspace_id": {
+                "type": "string",
+                "description": "Target workspace ID (from list_peers).",
+            },
+            "task": {
+                "type": "string",
+                "description": "Task description to send to the peer.",
+            },
+        },
+        "required": ["workspace_id", "task"],
+    },
+    impl=tool_delegate_task,
+    section=A2A_SECTION,
+)
+
+_DELEGATE_TASK_ASYNC = ToolSpec(
+    name="delegate_task_async",
+    short=(
+        "Send a task to a peer and return immediately with a task_id "
+        "(non-blocking)."
+    ),
+    when_to_use=(
+        "Use for long-running work where you want to keep doing other "
+        "things while the peer processes. Poll with check_task_status "
+        "to retrieve the result. The platform's A2A queue handles "
+        "delivery + retries; the peer works independently."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "workspace_id": {
+                "type": "string",
+                "description": "Target workspace ID (from list_peers).",
+            },
+            "task": {
+                "type": "string",
+                "description": "Task description to send to the peer.",
+            },
+        },
+        "required": ["workspace_id", "task"],
+    },
+    impl=tool_delegate_task_async,
+    section=A2A_SECTION,
+)
+
+_CHECK_TASK_STATUS = ToolSpec(
+    name="check_task_status",
+    short=(
+        "Poll the status of a task started with delegate_task_async; "
+        "returns result when done."
+    ),
+    when_to_use=(
+        "Statuses: pending/in_progress (peer still working — wait), "
+        "queued (peer is busy with a prior task — DO NOT retry, the "
+        "platform stitches the response when it finishes), completed "
+        "(result available), failed (real error — fall back to a "
+        "different peer or handle it yourself)."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "workspace_id": {
+                "type": "string",
+                "description": "Workspace ID the task was sent to.",
+            },
+            "task_id": {
+                "type": "string",
+                "description": "task_id returned by delegate_task_async.",
+            },
+        },
+        "required": ["workspace_id", "task_id"],
+    },
+    impl=tool_check_task_status,
+    section=A2A_SECTION,
+)
+
+_LIST_PEERS = ToolSpec(
+    name="list_peers",
+    short=(
+        "List the workspaces this agent can communicate with — name, "
+        "ID, status, role for each."
+    ),
+    when_to_use=(
+        "Call this first when you need to delegate but don't know the "
+        "target's ID. Access control is enforced — you only see "
+        "siblings, parent, and direct children."
+    ),
+    input_schema={"type": "object", "properties": {}},
+    impl=tool_list_peers,
+    section=A2A_SECTION,
+)
+
+_GET_WORKSPACE_INFO = ToolSpec(
+    name="get_workspace_info",
+    short="Get this workspace's own info — ID, name, role, tier, parent, status.",
+    when_to_use=(
+        "Use to introspect your own identity (e.g. before reporting "
+        "back to the user, or to determine whether you're a tier-0 "
+        "root that can write GLOBAL memory)."
+    ),
+    input_schema={"type": "object", "properties": {}},
+    impl=tool_get_workspace_info,
+    section=A2A_SECTION,
+)
+
+_SEND_MESSAGE_TO_USER = ToolSpec(
+    name="send_message_to_user",
+    short=(
+        "Send a message directly to the user's canvas chat — pushed instantly "
+        "via WebSocket. Use this to: (1) acknowledge a task immediately ('Got "
+        "it, I'll start working on this'), (2) send interim progress updates "
+        "while doing long work, (3) deliver follow-up results after delegation "
+        "completes, (4) attach files (zip, pdf, csv, image) for the user to "
+        "download via the `attachments` field (NEVER paste file URLs in "
+        "`message`). The message appears in the user's chat as if you're "
+        "proactively reaching out."
+    ),
+    when_to_use=(
+        "Use proactively across the lifecycle of a task — early to "
+        "acknowledge, mid-flight to update, late to deliver. Never paste "
+        "file URLs in the message body — always pass absolute paths in "
+        "`attachments` so the platform serves them as download chips "
+        "(works on SaaS where external file hosts are unreachable)."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "message": {
+                "type": "string",
+                # The "no URLs in message text" rule is the single biggest
+                # cause of bad chat UX: agents drop catbox.moe / file://
+                # / temporary upload-host links into the prose, the
+                # canvas renders them as plain markdown links the user
+                # can't preview, and SaaS deployments often can't even
+                # reach those external hosts. Every download MUST go
+                # through the structured `attachments` field below.
+                "description": (
+                    "Caption text for the chat bubble. Required even when sending "
+                    "attachments — set to a short label like 'Here's the build:' "
+                    "or 'Done — see attached.'\n\n"
+                    "DO NOT paste file URLs, download links, or container paths in "
+                    "this string. Files MUST go through the `attachments` field, "
+                    "which renders as a clickable download chip and works on SaaS "
+                    "deployments where external file-host URLs (catbox.moe, file://, "
+                    "etc.) are unreachable from the user's browser."
+                ),
+            },
+            "attachments": {
+                "type": "array",
+                "description": (
+                    "REQUIRED for any file delivery. Pass absolute file paths inside "
+                    "THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) "
+                    "— the platform uploads each file and returns a download chip "
+                    "with the file's icon + name + size in the user's chat. The chip "
+                    "works in SaaS deployments because the URL is platform-served, "
+                    "not an external host.\n\n"
+                    "USE THIS instead of: pasting URLs in `message`, base64-encoding "
+                    "in the body, or telling the user to look at a path on disk. "
+                    "If the file isn't already on disk, write it first (Bash, Write "
+                    "tool, etc.) then pass its path here. 25 MB per file cap."
+                ),
+                "items": {"type": "string"},
+            },
+        },
+        "required": ["message"],
+    },
+    impl=tool_send_message_to_user,
+    section=A2A_SECTION,
+)
+
+
+# ---------------------------------------------------------------------------
+# HMA — hierarchical persistent memory
+# ---------------------------------------------------------------------------
+
+_COMMIT_MEMORY = ToolSpec(
+    name="commit_memory",
+    short="Save a fact to persistent memory; survives across sessions and restarts.",
+    when_to_use=(
+        "Scopes: LOCAL (private to you, default), TEAM (shared with "
+        "parent + siblings), GLOBAL (entire org — only tier-0 root "
+        "workspaces can write). Commit decisions, learned facts, and "
+        "completed-task summaries so future sessions and teammates "
+        "can recall them."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "content": {
+                "type": "string",
+                "description": "What to remember — be specific.",
+            },
+            "scope": {
+                "type": "string",
+                "enum": ["LOCAL", "TEAM", "GLOBAL"],
+                "description": "Memory scope (default LOCAL).",
+            },
+        },
+        "required": ["content"],
+    },
+    impl=tool_commit_memory,
+    section=MEMORY_SECTION,
+)
+
+_RECALL_MEMORY = ToolSpec(
+    name="recall_memory",
+    short="Search persistent memory; returns matching LOCAL + TEAM + GLOBAL rows.",
+    when_to_use=(
+        "Call at the start of new work and when picking up something "
+        "you may have done before. Empty query returns ALL accessible "
+        "memories — cheap and avoids missing rows that don't match a "
+        "narrow keyword. Memory is automatically recalled at session "
+        "start; use this to refresh mid-session."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "Search query (empty returns all).",
+            },
+            "scope": {
+                "type": "string",
+                "enum": ["LOCAL", "TEAM", "GLOBAL", ""],
+                "description": "Filter by scope (empty = all accessible).",
+            },
+        },
+    },
+    impl=tool_recall_memory,
+    section=MEMORY_SECTION,
+)
+
+
+# ---------------------------------------------------------------------------
+# Public registry. Keep alphabetically grouped by section for stable
+# adapter listings + diff-friendly review.
+# ---------------------------------------------------------------------------
+
+TOOLS: list[ToolSpec] = [
+    # A2A
+    _DELEGATE_TASK,
+    _DELEGATE_TASK_ASYNC,
+    _CHECK_TASK_STATUS,
+    _LIST_PEERS,
+    _GET_WORKSPACE_INFO,
+    _SEND_MESSAGE_TO_USER,
+    # HMA
+    _COMMIT_MEMORY,
+    _RECALL_MEMORY,
+]
+
+
+def a2a_tools() -> list[ToolSpec]:
+    """All A2A-section tools, in registration order."""
+    return [t for t in TOOLS if t.section == A2A_SECTION]
+
+
+def memory_tools() -> list[ToolSpec]:
+    """All memory-section tools, in registration order."""
+    return [t for t in TOOLS if t.section == MEMORY_SECTION]
+
+
+def by_name(name: str) -> ToolSpec:
+    """Look up a spec by its canonical name. Raises KeyError if absent."""
+    for t in TOOLS:
+        if t.name == name:
+            return t
+    raise KeyError(f"no platform tool named {name!r}")
+
+
+def tool_names() -> list[str]:
+    """Canonical names in registration order."""
+    return [t.name for t in TOOLS]
@@ -64,7 +64,7 @@ def build_team_routing_payload(
        "action": "choose_member",
        "message": (
            f"You have {len(members)} team members. "
-            "Choose the best one for this task and call delegate_to_workspace with their ID."
+            "Choose the best one for this task and call delegate_task_async with their ID."
        ),
        "task": task,
        "members": members,
@@ -4,6 +4,7 @@ import logging
 import os
 from pathlib import Path

+from executor_helpers import get_a2a_instructions, get_hma_instructions
 from skill_loader.loader import LoadedSkill
 from shared_runtime import build_peer_section

@@ -68,6 +69,7 @@ def build_system_prompt(
    plugin_prompts: list[str] | None = None,
    parent_context: list[dict] | None = None,
    platform_instructions: str = "",
+    a2a_mcp: bool = True,
 ) -> str:
    """Build the complete system prompt.

@@ -154,6 +156,20 @@ def build_system_prompt(
            parts.append(skill.instructions)
            parts.append("")

+    # Platform tool instructions: A2A (inter-agent communication) and HMA
+    # (persistent memory). These document how to call delegate_task,
+    # commit_memory, etc — without them, agents see the tools registered
+    # but have no instructions on when/how to use them. Placed between
+    # Skills and Peers so the A2A docs precede the peer list (which is
+    # the data shape the A2A tools operate over).
+    #
+    # a2a_mcp=True: MCP tool variant (claude-code, hermes, langchain,
+    # crewai). a2a_mcp=False: CLI subprocess variant (ollama, custom
+    # runtimes that don't speak MCP). Default True matches the
+    # MCP-capable majority; CLI-only adapters override at the call site.
+    parts.append(get_a2a_instructions(mcp=a2a_mcp))
+    parts.append(get_hma_instructions())
+
    # Add peer capabilities with a single shared renderer.
    peer_section = build_peer_section(peers)
    if peer_section:
@@ -9,10 +9,10 @@
 a2a-sdk[http-server]>=1.0.0,<2.0

 # HTTP / server
-httpx>=0.27.0
-uvicorn>=0.30.0
+httpx>=0.28.1
+uvicorn>=0.46.0
 starlette>=0.38.0
-websockets>=12.0
+websockets>=16.0

 # Config parsing
 pyyaml>=6.0
@@ -24,7 +24,7 @@ langchain-core>=0.3.0
 # tools/telemetry.py gracefully degrades (noop) when these are absent,
 # but they are required for actual trace export.
 opentelemetry-api>=1.24.0
-opentelemetry-sdk>=1.24.0
+opentelemetry-sdk>=1.41.1
 # OTLP/HTTP exporter: sends spans to any OTEL collector and to Langfuse ≥4
 opentelemetry-exporter-otlp-proto-http>=1.24.0

@@ -36,4 +36,4 @@ sqlalchemy>=2.0.0
 # tasks survive crashes and can resume.  The module and TemporalWorkflowWrapper
 # load cleanly without this package — all paths fall back to direct execution.
 # Requires a running Temporal server; set TEMPORAL_HOST=<host>:7233 to enable.
-temporalio>=1.7.0
+temporalio>=1.26.0
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import json
 from typing import Any

 from a2a.server.agent_execution import RequestContext
@@ -89,33 +90,46 @@ def append_peer_guidance(


 def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """Return compact peer metadata for prompt rendering."""
+    """Return compact peer metadata for prompt rendering.
+
+    Falls back to the registry row's `name` and `role` when `agent_card` is
+    null or unparseable so peers stay visible to delegators even before
+    their A2A discovery roundtrip has populated a card. Without this
+    fallback a coordinator-tier workspace with N freshly-created worker
+    peers would render an empty `## Your Peers` section and refuse to
+    delegate (the regression behind the 2026-04-27 Design Director
+    discovery bug).
+    """
    summaries: list[dict[str, Any]] = []
    for peer in peers:
        agent_card = peer.get("agent_card")
-        if not agent_card:
-            continue
        if isinstance(agent_card, str):
            try:
-                import json
-
                agent_card = json.loads(agent_card)
            except Exception:
-                continue
+                agent_card = None
        if not isinstance(agent_card, dict):
-            continue
+            agent_card = None
+
+        if agent_card:
+            skills_raw = agent_card.get("skills") or []
+            skills = [
+                s.get("name", s.get("id", ""))
+                for s in skills_raw
+                if isinstance(s, dict)
+            ]
+            name = agent_card.get("name") or peer.get("name") or "Unknown"
+        else:
+            skills = []
+            name = peer.get("name") or "Unknown"

-        skills = agent_card.get("skills", [])
        summaries.append(
            {
                "id": peer.get("id", "unknown"),
-                "name": agent_card.get("name", peer.get("name", "Unknown")),
+                "name": name,
+                "role": peer.get("role") or "",
                "status": peer.get("status", "unknown"),
-                "skills": [
-                    s.get("name", s.get("id", ""))
-                    for s in skills
-                    if isinstance(s, dict)
-                ],
+                "skills": skills,
            }
        )
    return summaries
@@ -126,7 +140,7 @@ def build_peer_section(
    *,
    heading: str = "## Your Peers (workspaces you can delegate to)",
    instruction: str = (
-        "Use the `delegate_to_workspace` tool to send tasks to peers. "
+        "Use the `delegate_task_async` tool to send tasks to peers. "
        "Only delegate to peers listed above."
    ),
 ) -> str:
@@ -140,6 +154,8 @@ def build_peer_section(
        parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})")
        if peer["skills"]:
            parts.append(f"  Skills: {', '.join(peer['skills'])}")
+        elif peer.get("role"):
+            parts.append(f"  Role: {peer['role']}")
        parts.append("")
    parts.append(instruction)
    return "\n".join(parts)
@@ -113,10 +113,12 @@ def _make_tools_mocks():
    tools_mod.__path__ = []  # Make it a proper package

    tools_delegation_mod = ModuleType("builtin_tools.delegation")
-    tools_delegation_mod.delegate_to_workspace = MagicMock()
-    tools_delegation_mod.delegate_to_workspace.name = "delegate_to_workspace"
-    tools_delegation_mod.check_delegation_status = MagicMock()
-    tools_delegation_mod.check_delegation_status.name = "check_delegation_status"
+    tools_delegation_mod.delegate_task = MagicMock()
+    tools_delegation_mod.delegate_task.name = "delegate_task"
+    tools_delegation_mod.delegate_task_async = MagicMock()
+    tools_delegation_mod.delegate_task_async.name = "delegate_task_async"
+    tools_delegation_mod.check_task_status = MagicMock()
+    tools_delegation_mod.check_task_status.name = "check_task_status"

    tools_approval_mod = ModuleType("builtin_tools.approval")
    tools_approval_mod.request_approval = MagicMock()
@@ -125,8 +127,8 @@ def _make_tools_mocks():
    tools_memory_mod = ModuleType("builtin_tools.memory")
    tools_memory_mod.commit_memory = MagicMock()
    tools_memory_mod.commit_memory.name = "commit_memory"
-    tools_memory_mod.search_memory = MagicMock()
-    tools_memory_mod.search_memory.name = "search_memory"
+    tools_memory_mod.recall_memory = MagicMock()
+    tools_memory_mod.recall_memory.name = "recall_memory"

    tools_sandbox_mod = ModuleType("builtin_tools.sandbox")
    tools_sandbox_mod.run_code = MagicMock()
@@ -0,0 +1,10 @@
+## Inter-Agent Communication
+You can delegate tasks to other workspaces using the a2a command:
+  python3 -m molecule_runtime.a2a_cli peers                                  # List available peers
+  python3 -m molecule_runtime.a2a_cli delegate <workspace_id> <task>          # Sync: wait for response
+  python3 -m molecule_runtime.a2a_cli delegate --async <workspace_id> <task>  # Async: return task_id
+  python3 -m molecule_runtime.a2a_cli status <workspace_id> <task_id>         # Check async task
+  python3 -m molecule_runtime.a2a_cli info                                    # Your workspace info
+
+For quick questions, use sync delegate. For long tasks, use --async + status.
+Only delegate to peers listed by the peers command (access control enforced).
@@ -0,0 +1,28 @@
+## Inter-Agent Communication
+
+- **delegate_task**: Delegate a task to a peer workspace via A2A and WAIT for the response (synchronous).
+- **delegate_task_async**: Send a task to a peer and return immediately with a task_id (non-blocking).
+- **check_task_status**: Poll the status of a task started with delegate_task_async; returns result when done.
+- **list_peers**: List the workspaces this agent can communicate with — name, ID, status, role for each.
+- **get_workspace_info**: Get this workspace's own info — ID, name, role, tier, parent, status.
+- **send_message_to_user**: Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.
+
+### delegate_task
+Use for QUICK questions and small sub-tasks where you can afford to wait inline. Returns the peer's response text directly. For longer-running work (research, multi-minute jobs) use delegate_task_async + check_task_status instead so you don't hold this workspace busy waiting.
+
+### delegate_task_async
+Use for long-running work where you want to keep doing other things while the peer processes. Poll with check_task_status to retrieve the result. The platform's A2A queue handles delivery + retries; the peer works independently.
+
+### check_task_status
+Statuses: pending/in_progress (peer still working — wait), queued (peer is busy with a prior task — DO NOT retry, the platform stitches the response when it finishes), completed (result available), failed (real error — fall back to a different peer or handle it yourself).
+
+### list_peers
+Call this first when you need to delegate but don't know the target's ID. Access control is enforced — you only see siblings, parent, and direct children.
+
+### get_workspace_info
+Use to introspect your own identity (e.g. before reporting back to the user, or to determine whether you're a tier-0 root that can write GLOBAL memory).
+
+### send_message_to_user
+Use proactively across the lifecycle of a task — early to acknowledge, mid-flight to update, late to deliver. Never paste file URLs in the message body — always pass absolute paths in `attachments` so the platform serves them as download chips (works on SaaS where external file hosts are unreachable).
+
+Always use list_peers first to discover available workspace IDs. Access control is enforced — you can only reach siblings and parent/children. If a delegation returns a DELEGATION FAILED message, do NOT forward the raw error to the user. Instead: (1) try a different peer, (2) handle the task yourself, or (3) tell the user which peer is unavailable and provide your own best answer.
@@ -0,0 +1,12 @@
+## Hierarchical Memory (HMA)
+
+- **commit_memory**: Save a fact to persistent memory; survives across sessions and restarts.
+- **recall_memory**: Search persistent memory; returns matching LOCAL + TEAM + GLOBAL rows.
+
+### commit_memory
+Scopes: LOCAL (private to you, default), TEAM (shared with parent + siblings), GLOBAL (entire org — only tier-0 root workspaces can write). Commit decisions, learned facts, and completed-task summaries so future sessions and teammates can recall them.
+
+### recall_memory
+Call at the start of new work and when picking up something you may have done before. Empty query returns ALL accessible memories — cheap and avoids missing rows that don't match a narrow keyword. Memory is automatically recalled at session start; use this to refresh mid-session.
+
+Memory is automatically recalled at the start of each new session. Use commit_memory proactively during work so future sessions and teammates can recall what you learned.
@@ -28,7 +28,7 @@ async def test_route_task_to_team_delegates_preferred_member(monkeypatch):

    delegate = MagicMock()
    delegate.ainvoke = AsyncMock(return_value={"ok": True})
-    monkeypatch.setattr(sys.modules["builtin_tools.delegation"], "delegate_to_workspace", delegate)
+    monkeypatch.setattr(sys.modules["builtin_tools.delegation"], "delegate_task_async", delegate)

    result = await coordinator.route_task_to_team(
        "Do the thing",
@@ -58,4 +58,4 @@ def test_build_children_description_reuses_shared_renderer():
    assert "## Your Team (sub-workspaces you coordinate)" in description
    assert "**Alpha** (id: `child-1`, status: online)" in description
    assert "Skills: research" in description
-    assert "delegate_to_workspace" in description
+    assert "delegate_task_async" in description
@@ -4,7 +4,7 @@ The delegation tool now returns immediately with a task_id and runs the
 A2A request in the background. Tests verify:
 1. Immediate return with task_id
 2. Background task completion
-3. check_delegation_status retrieval
+3. check_task_status retrieval
 4. Error handling (RBAC, discovery, network)
 """

@@ -109,22 +109,22 @@ def delegation_mocks(monkeypatch):


 async def _invoke(mod, workspace_id="target", task="do stuff"):
-    """Call delegate_to_workspace and return the immediate result."""
-    fn = mod.delegate_to_workspace
+    """Call delegate_task_async and return the immediate result."""
+    fn = mod.delegate_task_async
    if hasattr(fn, "ainvoke"):
        return await fn.ainvoke({"workspace_id": workspace_id, "task": task})
    return await fn(workspace_id=workspace_id, task=task)


 async def _invoke_and_wait(mod, workspace_id="target", task="do stuff"):
-    """Call delegate_to_workspace, wait for background task, return status."""
+    """Call delegate_task_async, wait for background task, return status."""
    result = await _invoke(mod, workspace_id, task)
    # Wait for all background tasks to complete
    if mod._background_tasks:
        await asyncio.gather(*mod._background_tasks, return_exceptions=True)
    # Get final status
    if "task_id" in result:
-        fn = mod.check_delegation_status
+        fn = mod.check_task_status
        if hasattr(fn, "ainvoke"):
            return await fn.ainvoke({"task_id": result["task_id"]})
        return await fn(task_id=result["task_id"])
@@ -182,7 +182,7 @@ class TestAsyncDelegation:
            await _invoke(mod, workspace_id="ws-a", task="task A")
            await _invoke(mod, workspace_id="ws-b", task="task B")

-        fn = mod.check_delegation_status
+        fn = mod.check_task_status
        if hasattr(fn, "ainvoke"):
            result = await fn.ainvoke({"task_id": ""})
        else:
@@ -194,7 +194,7 @@ class TestAsyncDelegation:
    async def test_check_delegation_not_found(self, delegation_mocks):
        mod, *_ = delegation_mocks

-        fn = mod.check_delegation_status
+        fn = mod.check_task_status
        if hasattr(fn, "ainvoke"):
            result = await fn.ainvoke({"task_id": "nonexistent"})
        else:
@@ -354,7 +354,7 @@ class TestA2AQueued:


 class TestQueuedLazyRefresh:
-    """When a delegation is QUEUED, check_delegation_status must lazily
+    """When a delegation is QUEUED, check_task_status must lazily
    refresh from the platform's GET /delegations to pick up drain-stitch
    completions. Without this refresh, the LLM sees "queued" forever
    because the platform never pushes back to the runtime.
@@ -401,7 +401,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@@ -443,7 +443,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@@ -486,7 +486,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@@ -515,7 +515,7 @@ class TestQueuedLazyRefresh:
        refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", refresh_cls):
-            fn = mod.check_delegation_status
+            fn = mod.check_task_status
            if hasattr(fn, "ainvoke"):
                refreshed = await fn.ainvoke({"task_id": task_id})
            else:
@@ -438,9 +438,12 @@ def test_get_system_prompt_handles_non_utf8(tmp_path):

 def test_get_a2a_instructions_mcp_default():
    out = get_a2a_instructions()
-    assert "MCP tools" in out
+    # Section heading is the canonical agent-facing label.
+    assert "## Inter-Agent Communication" in out
+    # Every A2A tool from the registry must appear by name.
    assert "list_peers" in out
    assert "send_message_to_user" in out
+    assert "delegate_task" in out


 def test_get_a2a_instructions_cli_variant():
@@ -468,32 +471,27 @@ def test_a2a_cli_instructions_use_module_invocation_not_legacy_app_path():


 def test_a2a_mcp_instructions_reference_existing_tools():
-    """The MCP instructions text must only reference tools that are actually
-    registered in a2a_mcp_server.py. If someone renames a server tool, the
-    prompt text must be updated in lockstep — this test catches the drift.
+    """Pin the registry-driven alignment: every tool name appearing in the
+    agent-facing A2A instructions must be a tool the MCP server actually
+    registers. Both sides now derive from platform_tools.registry, so the
+    real test is that the registry's a2a_tools() set drives both surfaces
+    consistently.
    """
-    import re
-    import pathlib
-    mcp_server = pathlib.Path(__file__).parent.parent / "a2a_mcp_server.py"
-    registered = set(re.findall(r'"name":\s*"([a-z_]+)"', mcp_server.read_text()))
-    # The server advertises itself by name; strip that false positive.
-    registered.discard("a2a-delegation")
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+    from platform_tools.registry import a2a_tools

+    registered = {t["name"] for t in MCP_TOOLS}
    instructions = get_a2a_instructions(mcp=True)

-    # Every tool called out by name in the instructions must exist on the
-    # server. (We allow the server to have extras the prompt doesn't mention.)
-    referenced = {
-        "list_peers",
-        "delegate_task",
-        "delegate_task_async",
-        "check_task_status",
-        "get_workspace_info",
-        "send_message_to_user",
-    }
-    for name in referenced:
-        assert name in instructions, f"prompt missing {name}"
-        assert name in registered, f"MCP server no longer registers {name}"
+    for spec in a2a_tools():
+        assert spec.name in instructions, (
+            f"A2A instructions are missing the tool {spec.name!r} that "
+            f"the registry declares — the doc generator drifted."
+        )
+        assert spec.name in registered, (
+            f"MCP server no longer registers {spec.name!r} that the registry "
+            f"declares — the MCP TOOLS list drifted from the registry."
+        )


 # ======================================================================
@@ -98,7 +98,7 @@ def test_commit_memory_uses_awareness_client_when_configured(monkeypatch, memory
    assert captured["json"] == {"content": "remember this", "scope": "TEAM"}


-def test_search_memory_uses_platform_fallback_without_awareness(monkeypatch, memory_modules):
+def test_recall_memory_uses_platform_fallback_without_awareness(monkeypatch, memory_modules):
    memory, _awareness_client = memory_modules
    captured = {}

@@ -119,7 +119,7 @@ def test_search_memory_uses_platform_fallback_without_awareness(monkeypatch, mem

    monkeypatch.setattr(memory.httpx, "AsyncClient", FakeAsyncClient)

-    result = asyncio.run(memory.search_memory("status", "local"))
+    result = asyncio.run(memory.recall_memory("status", "local"))

    assert result == {
        "success": True,
@@ -236,10 +236,10 @@ def test_commit_memory_promoted_packet_logs_skill_promotion(monkeypatch, tmp_pat
    assert not (tmp_path / "skills").exists()


-def test_search_memory_rejects_invalid_scope(memory_modules):
+def test_recall_memory_rejects_invalid_scope(memory_modules):
    memory, _awareness_client = memory_modules

-    result = asyncio.run(memory.search_memory("status", "bad"))
+    result = asyncio.run(memory.recall_memory("status", "bad"))

    assert result == {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}

@@ -457,15 +457,15 @@ def test_commit_memory_result_failure(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — RBAC deny
+# recall_memory — RBAC deny
 # ---------------------------------------------------------------------------

-def test_search_memory_rbac_deny(memory_modules_with_mocks):
+def test_recall_memory_rbac_deny(memory_modules_with_mocks):
    memory, mock_audit, _ = memory_modules_with_mocks
    mock_audit.check_permission.return_value = False
    mock_audit.get_workspace_roles.return_value = (["read-only-special"], {})

-    result = asyncio.run(memory.search_memory("find something", "local"))
+    result = asyncio.run(memory.recall_memory("find something", "local"))

    assert result["success"] is False
    assert "RBAC" in result["error"]
@@ -473,22 +473,22 @@ def test_search_memory_rbac_deny(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — invalid scope
+# recall_memory — invalid scope
 # ---------------------------------------------------------------------------

-def test_search_memory_invalid_scope(memory_modules_with_mocks):
+def test_recall_memory_invalid_scope(memory_modules_with_mocks):
    memory, _mock_audit, _ = memory_modules_with_mocks

-    result = asyncio.run(memory.search_memory("q", "BAD"))
+    result = asyncio.run(memory.recall_memory("q", "BAD"))

    assert result == {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}


 # ---------------------------------------------------------------------------
-# search_memory — awareness_client success
+# recall_memory — awareness_client success
 # ---------------------------------------------------------------------------

-def test_search_memory_awareness_client_success(memory_modules_with_mocks):
+def test_recall_memory_awareness_client_success(memory_modules_with_mocks):
    from unittest.mock import AsyncMock, MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks

@@ -501,7 +501,7 @@ def test_search_memory_awareness_client_success(memory_modules_with_mocks):
    # Patch directly on the loaded module since it imported the name at load time
    memory.build_awareness_client = MagicMock(return_value=mock_ac)

-    result = asyncio.run(memory.search_memory("find", "team"))
+    result = asyncio.run(memory.recall_memory("find", "team"))

    assert result["success"] is True
    assert result["count"] == 2
@@ -509,10 +509,10 @@ def test_search_memory_awareness_client_success(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — awareness_client raises
+# recall_memory — awareness_client raises
 # ---------------------------------------------------------------------------

-def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
+def test_recall_memory_awareness_client_exception(memory_modules_with_mocks):
    from unittest.mock import AsyncMock, MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks

@@ -521,7 +521,7 @@ def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
    # Patch directly on the loaded module since it imported the name at load time
    memory.build_awareness_client = MagicMock(return_value=mock_ac)

-    result = asyncio.run(memory.search_memory("query", "local"))
+    result = asyncio.run(memory.recall_memory("query", "local"))

    assert result["success"] is False
    assert "awareness search failed" in result["error"]
@@ -530,10 +530,10 @@ def test_search_memory_awareness_client_exception(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — httpx 200 success (no awareness_client)
+# recall_memory — httpx 200 success (no awareness_client)
 # ---------------------------------------------------------------------------

-def test_search_memory_httpx_200_success(memory_modules_with_mocks):
+def test_recall_memory_httpx_200_success(memory_modules_with_mocks):
    memory, _mock_audit, _ = memory_modules_with_mocks

    class FakeAsyncClient:
@@ -545,7 +545,7 @@ def test_search_memory_httpx_200_success(memory_modules_with_mocks):

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("find", "global"))
+    result = asyncio.run(memory.recall_memory("find", "global"))

    assert result["success"] is True
    assert result["count"] == 2
@@ -553,10 +553,10 @@ def test_search_memory_httpx_200_success(memory_modules_with_mocks):


 # ---------------------------------------------------------------------------
-# search_memory — httpx non-200
+# recall_memory — httpx non-200
 # ---------------------------------------------------------------------------

-def test_search_memory_httpx_non_200(memory_modules_with_mocks):
+def test_recall_memory_httpx_non_200(memory_modules_with_mocks):
    memory, mock_audit, _ = memory_modules_with_mocks

    class FakeAsyncClient:
@@ -568,17 +568,17 @@ def test_search_memory_httpx_non_200(memory_modules_with_mocks):

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("q", ""))
+    result = asyncio.run(memory.recall_memory("q", ""))

    assert result["success"] is False
    assert "server error" in result["error"]


 # ---------------------------------------------------------------------------
-# search_memory — httpx raises
+# recall_memory — httpx raises
 # ---------------------------------------------------------------------------

-def test_search_memory_httpx_exception(memory_modules_with_mocks):
+def test_recall_memory_httpx_exception(memory_modules_with_mocks):
    memory, mock_audit, _ = memory_modules_with_mocks

    class FakeAsyncClient:
@@ -590,7 +590,7 @@ def test_search_memory_httpx_exception(memory_modules_with_mocks):

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("query", "local"))
+    result = asyncio.run(memory.recall_memory("query", "local"))

    assert result["success"] is False
    assert "request timed out" in result["error"]
@@ -672,7 +672,7 @@ def test_commit_memory_awareness_exception_span_record_fails(memory_modules_with
    assert result["success"] is False  # error propagated despite span failure


-def test_search_memory_awareness_exception_span_record_fails(memory_modules_with_mocks):
+def test_recall_memory_awareness_exception_span_record_fails(memory_modules_with_mocks):
    """awareness_client.search raises + span.record_exception also raises: error still returned."""
    from unittest.mock import AsyncMock, MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks
@@ -685,7 +685,7 @@ def test_search_memory_awareness_exception_span_record_fails(memory_modules_with
    mock_ac.search = AsyncMock(side_effect=RuntimeError("awareness down"))
    memory.build_awareness_client = MagicMock(return_value=mock_ac)

-    result = asyncio.run(memory.search_memory("test", "local"))
+    result = asyncio.run(memory.recall_memory("test", "local"))
    assert result["success"] is False


@@ -711,8 +711,8 @@ def test_commit_memory_httpx_exception_span_record_fails(memory_modules_with_moc
    assert result["success"] is False


-def test_search_memory_httpx_exception_span_record_fails(memory_modules_with_mocks):
-    """httpx raises in search_memory + span.record_exception also raises: error still returned."""
+def test_recall_memory_httpx_exception_span_record_fails(memory_modules_with_mocks):
+    """httpx raises in recall_memory + span.record_exception also raises: error still returned."""
    from unittest.mock import MagicMock
    memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks

@@ -729,7 +729,7 @@ def test_search_memory_httpx_exception_span_record_fails(memory_modules_with_moc

    memory.httpx.AsyncClient = FakeAsyncClient

-    result = asyncio.run(memory.search_memory("query", "local"))
+    result = asyncio.run(memory.recall_memory("query", "local"))
    assert result["success"] is False


@@ -0,0 +1,242 @@
+"""Structural alignment tests — every adapter must agree with the registry.
+
+The registry in workspace/platform_tools/registry.py is the single source
+of truth for tool naming + docs. These tests fail if any consumer
+(MCP server, LangChain @tool wrappers, doc generators) drifts.
+
+If you add a tool: append a ToolSpec to registry.TOOLS, then add the
+matching @tool wrapper in builtin_tools/. These tests catch the case
+where the registry has a name that has no LangChain @tool counterpart
+(or vice versa).
+
+If you rename a tool: edit registry.TOOLS only. These tests fail loudly
+if the LangChain @tool name or MCP TOOLS["name"] still has the old name.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from platform_tools.registry import TOOLS, a2a_tools, by_name, memory_tools, tool_names
+
+
+def test_registry_names_are_unique():
+    """Every ToolSpec must have a distinct name — duplicate is a typo."""
+    names = tool_names()
+    assert len(names) == len(set(names)), f"duplicate tool names: {names}"
+
+
+def test_registry_a2a_and_memory_partition_is_complete():
+    """Every tool belongs to exactly one section. No orphans."""
+    a2a = {t.name for t in a2a_tools()}
+    mem = {t.name for t in memory_tools()}
+    all_names = set(tool_names())
+    assert a2a | mem == all_names
+    assert not (a2a & mem), f"tool in both sections: {a2a & mem}"
+
+
+def test_by_name_lookup_works():
+    spec = by_name("delegate_task")
+    assert spec.name == "delegate_task"
+    assert spec.section == "a2a"
+    with pytest.raises(KeyError):
+        by_name("nonexistent_tool")
+
+
+def test_mcp_server_registers_every_registry_tool():
+    """The MCP server's TOOLS list is built from the registry. Every
+    spec must produce a corresponding entry — if not, the import-time
+    list comprehension is broken or the registry has an entry the
+    server isn't picking up.
+    """
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+
+    mcp_names = {t["name"] for t in MCP_TOOLS}
+    registry_names = set(tool_names())
+    assert mcp_names == registry_names, (
+        f"MCP and registry diverged. MCP-only: {mcp_names - registry_names}; "
+        f"registry-only: {registry_names - mcp_names}"
+    )
+
+
+def test_mcp_tool_descriptions_match_registry_short():
+    """Each MCP tool's description IS the registry's `short` field —
+    the bullet-line description shown to the model. The deeper
+    when_to_use guidance lives only in the system prompt.
+    """
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+
+    by_mcp_name = {t["name"]: t for t in MCP_TOOLS}
+    for spec in TOOLS:
+        assert by_mcp_name[spec.name]["description"] == spec.short, (
+            f"MCP description for {spec.name!r} drifted from registry.short. "
+            f"Edit registry.py, not the MCP server's TOOLS list."
+        )
+
+
+def test_mcp_tool_input_schemas_match_registry():
+    """Schemas must come from the registry, never duplicated in the server."""
+    from a2a_mcp_server import TOOLS as MCP_TOOLS
+
+    by_mcp_name = {t["name"]: t for t in MCP_TOOLS}
+    for spec in TOOLS:
+        assert by_mcp_name[spec.name]["inputSchema"] == spec.input_schema, (
+            f"MCP inputSchema for {spec.name!r} drifted from registry."
+        )
+
+
+def test_a2a_instructions_text_includes_every_a2a_tool():
+    """get_a2a_instructions must mention every a2a-section tool by name."""
+    from executor_helpers import get_a2a_instructions
+
+    instructions = get_a2a_instructions(mcp=True)
+    for spec in a2a_tools():
+        assert spec.name in instructions, (
+            f"agent-facing A2A docs missing tool {spec.name!r} from registry"
+        )
+
+
+def test_hma_instructions_text_includes_every_memory_tool():
+    """get_hma_instructions must mention every memory-section tool by name."""
+    from executor_helpers import get_hma_instructions
+
+    instructions = get_hma_instructions()
+    for spec in memory_tools():
+        assert spec.name in instructions, (
+            f"agent-facing HMA docs missing tool {spec.name!r} from registry"
+        )
+
+
+def test_old_pre_rename_names_not_present_in_docs():
+    """Pre-rename names (delegate_to_workspace, search_memory,
+    check_delegation_status) must not leak back into the agent-facing
+    docs. They're not in the registry; their absence is the canonical
+    state.
+    """
+    from executor_helpers import get_a2a_instructions, get_hma_instructions
+
+    blob = get_a2a_instructions(mcp=True) + get_hma_instructions()
+    for stale in ("delegate_to_workspace", "search_memory", "check_delegation_status"):
+        assert stale not in blob, (
+            f"pre-rename name {stale!r} leaked into docs — registry "
+            f"is the source of truth, not the doc generator."
+        )
+
+
+# ---------------------------------------------------------------------------
+# Snapshot / golden-file tests
+#
+# `_render_section` produces the LLM-visible system-prompt block. The
+# structural tests above guarantee tool NAMES are present; these tests
+# pin the SHAPE — bullet ordering, heading style, footer placement —
+# so a future contributor who reorders fields in `_render_section` or
+# rewrites a `when_to_use` paragraph sees the diff in CI.
+#
+# To regenerate after an intentional registry edit:
+#   cd workspace && WORKSPACE_ID=test-snapshot PLATFORM_URL=http://localhost \
+#     python3 -c "from executor_helpers import get_a2a_instructions, get_hma_instructions; \
+#                 open('tests/snapshots/a2a_instructions_mcp.txt','w').write(get_a2a_instructions(mcp=True)); \
+#                 open('tests/snapshots/a2a_instructions_cli.txt','w').write(get_a2a_instructions(mcp=False)); \
+#                 open('tests/snapshots/hma_instructions.txt','w').write(get_hma_instructions())"
+# ---------------------------------------------------------------------------
+
+from pathlib import Path
+
+_SNAPSHOTS = Path(__file__).parent / "snapshots"
+
+
+def _read_snapshot(name: str) -> str:
+    return (_SNAPSHOTS / name).read_text(encoding="utf-8")
+
+
+def test_a2a_mcp_instructions_match_snapshot():
+    """Pin the rendered MCP-variant A2A doc string against the golden file."""
+    from executor_helpers import get_a2a_instructions
+
+    actual = get_a2a_instructions(mcp=True)
+    expected = _read_snapshot("a2a_instructions_mcp.txt")
+    assert actual == expected, (
+        "get_a2a_instructions(mcp=True) drifted from snapshot. If the change "
+        "is intentional, regenerate with the command in the test-file header."
+    )
+
+
+def test_a2a_cli_instructions_match_snapshot():
+    """Pin the rendered CLI-variant A2A doc string against the golden file."""
+    from executor_helpers import get_a2a_instructions
+
+    actual = get_a2a_instructions(mcp=False)
+    expected = _read_snapshot("a2a_instructions_cli.txt")
+    assert actual == expected, (
+        "get_a2a_instructions(mcp=False) drifted from snapshot. If the change "
+        "is intentional, regenerate with the command in the test-file header."
+    )
+
+
+def test_hma_instructions_match_snapshot():
+    """Pin the rendered HMA persistent-memory doc string against the golden file."""
+    from executor_helpers import get_hma_instructions
+
+    actual = get_hma_instructions()
+    expected = _read_snapshot("hma_instructions.txt")
+    assert actual == expected, (
+        "get_hma_instructions() drifted from snapshot. If the change is "
+        "intentional, regenerate with the command in the test-file header."
+    )
+
+
+# ---------------------------------------------------------------------------
+# CLI-block alignment tests
+#
+# Registry is the source of truth for MCP-capable runtimes; the CLI
+# subprocess block (`_A2A_INSTRUCTIONS_CLI`) is a SEPARATE hand-maintained
+# surface for ollama and other non-MCP adapters. The two diverged
+# silently in the past — `send_message_to_user` was added to the
+# registry but the CLI block was never updated. These tests close that
+# gap by requiring a deliberate decision (subcommand keyword OR
+# explicit `None`) for every a2a tool.
+# ---------------------------------------------------------------------------
+
+
+def test_cli_keyword_mapping_covers_every_a2a_tool():
+    """Every a2a-section registry tool must have an entry in
+    `_CLI_A2A_COMMAND_KEYWORDS` — either a subcommand keyword or an
+    explicit `None`. Adding a new a2a tool without updating the
+    mapping fails this test, forcing the contributor to decide
+    whether the CLI subprocess interface should expose it.
+    """
+    from executor_helpers import _CLI_A2A_COMMAND_KEYWORDS
+
+    a2a_names = {t.name for t in a2a_tools()}
+    keyed_names = set(_CLI_A2A_COMMAND_KEYWORDS.keys())
+
+    missing = a2a_names - keyed_names
+    extra = keyed_names - a2a_names
+    assert not missing, (
+        f"a2a tools missing from _CLI_A2A_COMMAND_KEYWORDS: {missing}. "
+        f"Add a key for each — set value to the CLI subcommand keyword "
+        f"or None if the tool isn't exposed via the subprocess interface."
+    )
+    assert not extra, (
+        f"_CLI_A2A_COMMAND_KEYWORDS has keys for tools no longer in the "
+        f"registry: {extra}. Remove them."
+    )
+
+
+def test_cli_keyword_substrings_appear_in_cli_block():
+    """Every non-None subcommand keyword in `_CLI_A2A_COMMAND_KEYWORDS`
+    must literally appear in `_A2A_INSTRUCTIONS_CLI`. If a CLI
+    subcommand is mapped here but missing from the doc block, agents
+    on CLI-only runtimes don't see the invocation syntax.
+    """
+    from executor_helpers import _A2A_INSTRUCTIONS_CLI, _CLI_A2A_COMMAND_KEYWORDS
+
+    for tool_name, keyword in _CLI_A2A_COMMAND_KEYWORDS.items():
+        if keyword is None:
+            continue
+        assert keyword in _A2A_INSTRUCTIONS_CLI, (
+            f"_CLI_A2A_COMMAND_KEYWORDS[{tool_name!r}] = {keyword!r} but "
+            f"that substring is missing from _A2A_INSTRUCTIONS_CLI. Either "
+            f"add the subcommand to the CLI doc block or change the "
+            f"mapping value to None."
+        )
@@ -202,9 +202,12 @@ def test_peer_capabilities_format(tmp_path):
    assert "## Your Peers" in result
    assert "**Echo Agent** (id: `peer-1`, status: online)" in result
    assert "Skills: echo, repeat" in result
-    assert "delegate_to_workspace" in result
-    # peer-2 has no agent_card so it's skipped
-    assert "Silent Agent" not in result
+    assert "delegate_task_async" in result
+    # peer-2 has no agent_card but DOES have a DB name + status — must
+    # still render so coordinators can delegate to freshly-created peers
+    # whose A2A discovery hasn't populated a card yet (regression of the
+    # 2026-04-27 Design Director discovery bug).
+    assert "**Silent Agent** (id: `peer-2`, status: offline)" in result


 def test_peer_with_json_string_agent_card(tmp_path):
@@ -392,3 +395,77 @@ async def test_get_peer_capabilities_exception():
        result = await get_peer_capabilities("http://platform:8080", "ws-abc")

    assert result == []
+
+
+# Regression tests for the A2A + HMA tool-instruction injection. Pre-fix,
+# get_a2a_instructions() and get_hma_instructions() were defined in
+# executor_helpers.py but never called from build_system_prompt — workers
+# saw the platform's delegate_task / commit_memory tools registered but
+# had no documentation telling them how to use them.
+
+def test_a2a_instructions_injected_default_mcp(tmp_path):
+    """build_system_prompt embeds A2A MCP-variant instructions by default."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=[],
+    )
+
+    assert "## Inter-Agent Communication" in result
+    assert "delegate_task" in result
+    assert "list_peers" in result
+    assert "send_message_to_user" in result
+
+
+def test_a2a_instructions_cli_variant_when_disabled(tmp_path):
+    """a2a_mcp=False emits the CLI subprocess variant for non-MCP runtimes."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=[],
+        a2a_mcp=False,
+    )
+
+    assert "## Inter-Agent Communication" in result
+    assert "molecule_runtime.a2a_cli" in result
+    # MCP-only details must NOT leak into the CLI variant.
+    assert "send_message_to_user" not in result
+
+
+def test_hma_instructions_injected(tmp_path):
+    """build_system_prompt embeds HMA persistent-memory instructions."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=[],
+    )
+
+    assert "## Hierarchical Memory (HMA)" in result
+    assert "commit_memory" in result
+    assert "recall_memory" in result
+
+
+def test_tool_instructions_precede_peer_section(tmp_path):
+    """A2A docs must precede the peer list — peer IDs are operands of A2A tools."""
+    (tmp_path / "system-prompt.md").write_text("Base.")
+
+    peers = [{"id": "p1", "name": "Worker", "status": "active", "agent_card": None}]
+    result = build_system_prompt(
+        config_path=str(tmp_path),
+        workspace_id="ws-1",
+        loaded_skills=[],
+        peers=peers,
+    )
+
+    a2a_idx = result.index("## Inter-Agent Communication")
+    peers_idx = result.index("## Your Peers")
+    assert a2a_idx < peers_idx, "A2A instructions must come before the peer list"
@@ -0,0 +1,111 @@
+"""Pin peer-summary fallback when agent_card is missing.
+
+Regresses the 2026-04-27 Design Director discovery bug:
+`summarize_peer_cards()` previously skipped any peer whose `agent_card`
+was null or unparseable, so a coordinator with freshly-created workers
+saw an empty `## Your Peers` section in its system prompt and refused
+to delegate. The registry endpoint already returns DB `name` + `role`
+on every row regardless of agent_card state — falling back to those
+keeps peers visible while A2A discovery catches up.
+"""
+
+from __future__ import annotations
+
+from shared_runtime import build_peer_section, summarize_peer_cards
+
+
+def _peer(**overrides):
+    base = {
+        "id": "ws-1",
+        "name": "DB Name",
+        "role": "DB Role",
+        "status": "active",
+        "agent_card": None,
+    }
+    base.update(overrides)
+    return base
+
+
+def test_summarize_includes_peer_with_null_agent_card_using_db_fields():
+    summaries = summarize_peer_cards([_peer()])
+    assert len(summaries) == 1
+    assert summaries[0]["id"] == "ws-1"
+    assert summaries[0]["name"] == "DB Name"
+    assert summaries[0]["role"] == "DB Role"
+    assert summaries[0]["status"] == "active"
+    assert summaries[0]["skills"] == []
+
+
+def test_summarize_prefers_agent_card_name_over_db_name():
+    peer = _peer(
+        agent_card={"name": "Card Name", "skills": [{"name": "draft-spec"}]}
+    )
+    summaries = summarize_peer_cards([peer])
+    assert summaries[0]["name"] == "Card Name"
+    assert summaries[0]["skills"] == ["draft-spec"]
+    assert summaries[0]["role"] == "DB Role"
+
+
+def test_summarize_handles_string_agent_card_json():
+    peer = _peer(agent_card='{"name": "JSON Name", "skills": []}')
+    summaries = summarize_peer_cards([peer])
+    assert summaries[0]["name"] == "JSON Name"
+
+
+def test_summarize_falls_back_when_agent_card_string_is_malformed():
+    peer = _peer(agent_card="not-valid-json")
+    summaries = summarize_peer_cards([peer])
+    assert len(summaries) == 1
+    assert summaries[0]["name"] == "DB Name"
+    assert summaries[0]["role"] == "DB Role"
+    assert summaries[0]["skills"] == []
+
+
+def test_summarize_falls_back_when_agent_card_is_wrong_type():
+    peer = _peer(agent_card=42)
+    summaries = summarize_peer_cards([peer])
+    assert len(summaries) == 1
+    assert summaries[0]["name"] == "DB Name"
+
+
+def test_summarize_handles_missing_role_and_name_with_unknown_default():
+    peer = {"id": "ws-2", "status": "active", "agent_card": None}
+    summaries = summarize_peer_cards([peer])
+    assert summaries[0]["name"] == "Unknown"
+    assert summaries[0]["role"] == ""
+
+
+def test_build_peer_section_renders_role_when_skills_empty():
+    section = build_peer_section([_peer()])
+    assert "## Your Peers" in section
+    assert "**DB Name**" in section
+    assert "Role: DB Role" in section
+    assert "Skills:" not in section
+
+
+def test_build_peer_section_prefers_skills_over_role_when_card_present():
+    peer = _peer(
+        agent_card={"name": "Worker", "skills": [{"name": "design"}, {"name": "review"}]}
+    )
+    section = build_peer_section([peer])
+    assert "Skills: design, review" in section
+    assert "Role: DB Role" not in section
+
+
+def test_build_peer_section_mixed_peers():
+    peers = [
+        _peer(id="ws-a"),
+        _peer(
+            id="ws-b",
+            agent_card={"name": "Card B", "skills": [{"name": "build"}]},
+        ),
+    ]
+    section = build_peer_section(peers)
+    assert "id: `ws-a`" in section
+    assert "id: `ws-b`" in section
+    assert "Role: DB Role" in section
+    assert "Skills: build" in section
+
+
+def test_build_peer_section_empty_when_no_peers():
+    assert build_peer_section([]) == ""