Merge pull request #2415 from Molecule-AI/feat/molecule-mcp-inbox-polling

feat(workspace-runtime): inbox polling for standalone molecule-mcp
2026-04-30 23:41:47 +00:00 · 2026-04-30 23:41:47 +00:00 · cc58e87393
commit cc58e87393
parent d00c8be8c9 d061642cfc
10 changed files with 1186 additions and 0 deletions
--- a/scripts/build_runtime_package.py
+++ b/scripts/build_runtime_package.py
@ -64,6 +64,7 @@ TOP_LEVEL_MODULES = {
    "events",
    "executor_helpers",
    "heartbeat",
+    "inbox",
    "initial_prompt",
    "internal_chat_uploads",
    "internal_file_read",
--- a/scripts/wheel_smoke.py
+++ b/scripts/wheel_smoke.py
@ -43,6 +43,21 @@ def smoke_imports_and_invariants() -> None:
    assert callable(cli_main), "a2a_mcp_server.cli_main must be callable"
    assert callable(mcp_cli_main), "mcp_cli.main must be callable"

+    # inbox.activate / get_state / start_poller_thread form the inbound
+    # delivery path for the standalone molecule-mcp wrapper. mcp_cli.main
+    # imports + activates these at startup; if a wheel ships without
+    # them, the standalone agent silently loses the wait_for_message /
+    # inbox_peek / inbox_pop tools and reverts to outbound-only.
+    from molecule_runtime.inbox import (  # noqa: F401
+        InboxState,
+        activate as inbox_activate,
+        get_state as inbox_get_state,
+        start_poller_thread as inbox_start_poller_thread,
+    )
+    assert callable(inbox_activate), "inbox.activate must be callable"
+    assert callable(inbox_get_state), "inbox.get_state must be callable"
+    assert callable(inbox_start_poller_thread), "inbox.start_poller_thread must be callable"
+
    assert a2a_client._A2A_ERROR_PREFIX, "a2a_client missing error sentinel"
    assert callable(get_adapter), "adapters.get_adapter must be callable"
    assert hasattr(BaseAdapter, "name"), "BaseAdapter interface broken"
--- a/workspace/a2a_mcp_server.py
+++ b/workspace/a2a_mcp_server.py
@ -23,9 +23,12 @@ from a2a_tools import (
    tool_delegate_task,
    tool_delegate_task_async,
    tool_get_workspace_info,
+    tool_inbox_peek,
+    tool_inbox_pop,
    tool_list_peers,
    tool_recall_memory,
    tool_send_message_to_user,
+    tool_wait_for_message,
 )
 from platform_tools.registry import TOOLS as _PLATFORM_TOOL_SPECS

@ -112,6 +115,18 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
            arguments.get("query", ""),
            arguments.get("scope", ""),
        )
+    elif name == "wait_for_message":
+        return await tool_wait_for_message(
+            arguments.get("timeout_secs", 60.0),
+        )
+    elif name == "inbox_peek":
+        return await tool_inbox_peek(
+            arguments.get("limit", 10),
+        )
+    elif name == "inbox_pop":
+        return await tool_inbox_pop(
+            arguments.get("activity_id", ""),
+        )
    return f"Unknown tool: {name}"


--- a/workspace/a2a_tools.py
+++ b/workspace/a2a_tools.py
@ -526,3 +526,84 @@ async def tool_recall_memory(query: str = "", scope: str = "") -> str:
            return json.dumps(data)
    except Exception as e:
        return f"Error recalling memory: {e}"
+
+
+# ---------------------------------------------------------------------------
+# Inbox tools — inbound delivery for the standalone molecule-mcp path.
+# ---------------------------------------------------------------------------
+#
+# The InboxState singleton is set by mcp_cli before the MCP server starts
+# (see workspace/inbox.py for the rationale). In-container runtimes never
+# call ``inbox.activate(...)``, so ``inbox.get_state()`` returns None and
+# these tools surface an informational error rather than raising.
+#
+# When-to-use guidance (mirrored in platform_tools/registry.py): agents
+# in standalone-runtime mode should call ``wait_for_message`` to block
+# on the next inbound message after they've emitted a reply, forming
+# the loop ``wait → respond → wait``. ``inbox_peek`` is for inspecting
+# the queue without consuming; ``inbox_pop`` removes a handled message.
+
+_INBOX_NOT_ENABLED_MSG = (
+    "Error: inbox polling is not enabled in this runtime. The standalone "
+    "molecule-mcp wrapper activates it; in-container runtimes receive "
+    "messages via push delivery and do not need these tools."
+)
+
+
+async def tool_inbox_peek(limit: int = 10) -> str:
+    """Return up to ``limit`` pending inbound messages without removing them."""
+    import inbox  # local import — avoids a circular dep at module load
+
+    state = inbox.get_state()
+    if state is None:
+        return _INBOX_NOT_ENABLED_MSG
+    messages = state.peek(limit=limit if isinstance(limit, int) else 10)
+    return json.dumps([m.to_dict() for m in messages])
+
+
+async def tool_inbox_pop(activity_id: str) -> str:
+    """Remove a message from the inbox queue by activity_id."""
+    import inbox
+
+    state = inbox.get_state()
+    if state is None:
+        return _INBOX_NOT_ENABLED_MSG
+    if not isinstance(activity_id, str) or not activity_id:
+        return "Error: activity_id is required."
+    removed = state.pop(activity_id)
+    if removed is None:
+        return json.dumps({"removed": False, "activity_id": activity_id})
+    return json.dumps({"removed": True, "activity_id": activity_id})
+
+
+async def tool_wait_for_message(timeout_secs: float = 60.0) -> str:
+    """Block until a new message arrives or ``timeout_secs`` elapses.
+
+    Returns the head message non-destructively; the agent decides
+    whether to ``inbox_pop`` it after acting.
+    """
+    import asyncio
+
+    import inbox
+
+    state = inbox.get_state()
+    if state is None:
+        return _INBOX_NOT_ENABLED_MSG
+
+    try:
+        timeout = float(timeout_secs)
+    except (TypeError, ValueError):
+        timeout = 60.0
+    # Cap at 300s — Claude Code's default tool timeout is ~10min, and
+    # blocking longer than 5min wastes the prompt cache window for
+    # nothing useful. Operators who want longer can call repeatedly.
+    timeout = max(0.0, min(timeout, 300.0))
+
+    # The threading.Event-based wait would block the asyncio loop.
+    # Run it on the default executor so the MCP server can keep
+    # processing other JSON-RPC requests while we sleep.
+    loop = asyncio.get_running_loop()
+    message = await loop.run_in_executor(None, state.wait, timeout)
+    if message is None:
+        return json.dumps({"timeout": True, "timeout_secs": timeout})
+    return json.dumps(message.to_dict())
--- a/workspace/executor_helpers.py
+++ b/workspace/executor_helpers.py
@ -334,6 +334,14 @@ _CLI_A2A_COMMAND_KEYWORDS: dict[str, str | None] = {
    # grows a `say` or `message` subcommand, change `None` to that
    # keyword and the alignment test will start passing.
    "send_message_to_user": None,
+    # Inbox tools live in the standalone molecule-mcp wrapper only;
+    # CLI-subprocess runtimes have their own delivery loop and never
+    # invoke these. The alignment test allows None entries — they
+    # appear in registry.TOOLS for adapter consistency without
+    # forcing a CLI subcommand.
+    "wait_for_message": None,
+    "inbox_peek": None,
+    "inbox_pop": None,
 }


--- a/workspace/inbox.py
+++ b/workspace/inbox.py
@ -0,0 +1,480 @@
+"""In-memory inbox + background poller for the standalone molecule-mcp path.
+
+Purpose
+-------
+The universal MCP server (a2a_mcp_server.py) is OUTBOUND-ONLY by default —
+it gives an MCP-aware agent the same A2A delegation, peer-discovery, and
+memory tools that container-bound runtimes already have. There is no
+inbound delivery path: when the canvas user types a message or a peer
+sends an A2A request, the activity lands on the platform but the
+standalone agent never sees it.
+
+This module closes that gap WITHOUT requiring a tunnel or a public agent
+URL. A daemon thread polls ``/workspaces/:id/activity?type=a2a_receive``
+on the platform and stages new rows in an in-memory deque. Three new MCP
+tools (``inbox_peek``, ``inbox_pop``, ``wait_for_message``) let the
+agent observe the queue.
+
+Why a poller (not push)
+-----------------------
+runtime=external workspaces have ``delivery_mode="poll"`` — the platform
+records inbound A2A in ``activity_logs`` but does not call back to the
+agent. A poller is the only inbound surface that works without the
+operator exposing a public URL through a tunnel. 5s cadence matches
+the molecule-mcp-claude-channel plugin's POLL_INTERVAL — it's already
+proven on staging for the channel-based delivery path.
+
+Cursor model
+------------
+``activity_logs.id`` is the cursor (server-assigned, monotonic). We
+persist it to ``${CONFIGS_DIR}/.mcp_inbox_cursor`` so an agent restart
+doesn't replay the last 10 minutes of inbound traffic and re-act on
+already-handled messages. On 410 (cursor pruned) we drop back to
+``since_secs=600`` for a bounded backlog and let the cursor advance
+naturally from there.
+
+Scope
+-----
+Standalone molecule-mcp ONLY. The in-container runtime has its own
+push delivery (main.py + canvas WebSocket); we never want both
+running at once or a single message would be delivered twice. The
+caller (mcp_cli.main) gates activation explicitly via
+``activate(state)``; in-container code that imports this module by
+accident gets a no-op until activate is called.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import threading
+import time
+from collections import deque
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Poll cadence. 5s mirrors the molecule-mcp-claude-channel plugin's
+# proven default — fast enough that a canvas user typing "are you
+# there?" gets picked up before they refresh, slow enough that 12
+# requests/min won't trip rate limits or wake mobile devices.
+POLL_INTERVAL_SECONDS = 5.0
+
+# Initial backlog window for the first poll AND the recovery path
+# after a stale-cursor 410. 10 minutes is enough to cover a brief
+# crash/restart without flooding a long-idle workspace with hours of
+# stale chat.
+INITIAL_BACKLOG_SECONDS = 600
+
+# Hard cap on the in-memory deque. The poller is bounded by the
+# server's per-page limit (default 100) and the agent typically pops
+# faster than the operator types, so an idle workspace shouldn't
+# exceed a handful. The cap protects against runaway growth if the
+# agent process stops calling pop.
+MAX_QUEUED_MESSAGES = 200
+
+
+@dataclass
+class InboxMessage:
+    """One inbound A2A message staged for the agent.
+
+    Mirrors the shape the agent sees via inbox_peek / wait_for_message.
+    Fields are derived from the activity_logs row by ``_from_activity``.
+    """
+
+    activity_id: str
+    text: str
+    peer_id: str  # empty string = canvas user; non-empty = peer workspace_id
+    method: str  # JSON-RPC method ("message/send", "tasks/send", etc.)
+    created_at: str  # RFC3339 timestamp from the activity row
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "activity_id": self.activity_id,
+            "text": self.text,
+            "peer_id": self.peer_id,
+            "kind": "peer_agent" if self.peer_id else "canvas_user",
+            "method": self.method,
+            "created_at": self.created_at,
+        }
+
+
+@dataclass
+class InboxState:
+    """Thread-safe queue of pending inbound messages.
+
+    Producer: the poller thread, calling ``record(message)``.
+    Consumers: the MCP tool handlers, calling ``peek``, ``pop``,
+    or ``wait``. Synchronization is via a single ``threading.Lock``
+    (cheap — every operation is O(n) over a small deque) plus an
+    ``Event`` that wakes ``wait`` callers when a new message lands.
+    """
+
+    cursor_path: Path
+    """File path that persists ``activity_logs.id`` of the most
+    recently observed row, so a restart doesn't replay backlog."""
+
+    _queue: deque[InboxMessage] = field(default_factory=lambda: deque(maxlen=MAX_QUEUED_MESSAGES))
+    _lock: threading.Lock = field(default_factory=threading.Lock)
+    _arrival: threading.Event = field(default_factory=threading.Event)
+    _cursor: str | None = None
+    _cursor_loaded: bool = False
+
+    def load_cursor(self) -> str | None:
+        """Read the persisted cursor from disk. Cached after first call.
+
+        Missing/unreadable file → None (poller will fall back to the
+        initial-backlog window). We never raise: a corrupt cursor is
+        less bad than the inbox refusing to start.
+        """
+        with self._lock:
+            if self._cursor_loaded:
+                return self._cursor
+            try:
+                if self.cursor_path.is_file():
+                    self._cursor = self.cursor_path.read_text().strip() or None
+            except OSError as exc:
+                logger.warning("inbox: failed to read cursor %s: %s", self.cursor_path, exc)
+                self._cursor = None
+            self._cursor_loaded = True
+            return self._cursor
+
+    def save_cursor(self, activity_id: str) -> None:
+        """Persist the cursor. Best-effort — log + continue on failure.
+
+        Loss of the cursor on a write failure means an extra page of
+        backlog after restart, never a stuck poller. Silent-fail
+        would mask a permission misconfiguration on the operator's
+        configs dir; warn loudly so they can fix it.
+        """
+        with self._lock:
+            self._cursor = activity_id
+            self._cursor_loaded = True
+        try:
+            self.cursor_path.parent.mkdir(parents=True, exist_ok=True)
+            tmp = self.cursor_path.with_suffix(self.cursor_path.suffix + ".tmp")
+            tmp.write_text(activity_id)
+            tmp.replace(self.cursor_path)
+        except OSError as exc:
+            logger.warning("inbox: failed to persist cursor to %s: %s", self.cursor_path, exc)
+
+    def reset_cursor(self) -> None:
+        """Forget the cursor. Used after a 410 from the activity API."""
+        with self._lock:
+            self._cursor = None
+            self._cursor_loaded = True
+        try:
+            if self.cursor_path.is_file():
+                self.cursor_path.unlink()
+        except OSError as exc:
+            logger.warning("inbox: failed to delete cursor %s: %s", self.cursor_path, exc)
+
+    def record(self, message: InboxMessage) -> None:
+        """Append a message and wake any waiter.
+
+        Skips a row whose activity_id we've already queued — defensive
+        against the poller racing with the consumer + cursor save.
+        """
+        with self._lock:
+            for existing in self._queue:
+                if existing.activity_id == message.activity_id:
+                    return
+            self._queue.append(message)
+            self._arrival.set()
+
+    def peek(self, limit: int = 10) -> list[InboxMessage]:
+        """Return up to ``limit`` pending messages without removing them."""
+        if limit <= 0:
+            limit = 10
+        with self._lock:
+            return list(self._queue)[:limit]
+
+    def pop(self, activity_id: str) -> InboxMessage | None:
+        """Remove a specific message. Idempotent; returns None if absent.
+
+        We require the caller to specify which message it handled
+        rather than auto-popping the head — preserves observability
+        when the agent reads several but only handles one.
+        """
+        with self._lock:
+            for existing in list(self._queue):
+                if existing.activity_id == activity_id:
+                    self._queue.remove(existing)
+                    if not self._queue:
+                        self._arrival.clear()
+                    return existing
+        return None
+
+    def wait(self, timeout_secs: float) -> InboxMessage | None:
+        """Block until a message is available or timeout elapses.
+
+        Returns the head message WITHOUT popping; the caller decides
+        whether to pop after acting on it. Same shape as Python's
+        Queue.get with timeout, but non-destructive so a peek-style
+        agent can still inspect with peek/pop.
+        """
+        # Fast path: queue already has something.
+        with self._lock:
+            if self._queue:
+                return self._queue[0]
+            self._arrival.clear()
+
+        triggered = self._arrival.wait(timeout=max(0.0, timeout_secs))
+        if not triggered:
+            return None
+        with self._lock:
+            return self._queue[0] if self._queue else None
+
+
+# ---------------------------------------------------------------------------
+# Module singleton — set by mcp_cli before MCP server starts.
+# ---------------------------------------------------------------------------
+#
+# In-container callers don't activate; the inbox tools detect the
+# unset singleton and return an informational error rather than
+# breaking the dispatch path.
+
+_STATE: InboxState | None = None
+
+
+def activate(state: InboxState) -> None:
+    """Register an InboxState as the singleton this module exposes.
+
+    Idempotent within a process: re-activating with the same state is
+    a no-op; activating with a DIFFERENT state replaces the singleton
+    + logs at WARNING (the only legitimate caller is mcp_cli at
+    startup; double-activate usually means a test/runtime mix-up).
+    """
+    global _STATE
+    if _STATE is state:
+        return
+    if _STATE is not None:
+        logger.warning("inbox: replacing existing singleton state")
+    _STATE = state
+
+
+def get_state() -> InboxState | None:
+    """Return the active InboxState, or None if the runtime never activated.
+
+    Tool implementations call this and surface a clear "(inbox not
+    enabled)" message to the agent when None — keeps the in-container
+    path's tool dispatch from raising on an inbox-tool call that the
+    agent shouldn't have made anyway.
+    """
+    return _STATE
+
+
+# ---------------------------------------------------------------------------
+# Activity → InboxMessage adapter
+# ---------------------------------------------------------------------------
+#
+# The platform's a2a_proxy logs request_body as the JSON-RPC envelope
+# it forwarded to the workspace. Three shapes have been observed in
+# the wild (verified against workspace-server's logA2ASuccess in
+# a2a_proxy_helpers.go on 2026-04-29) — handle all three before
+# falling back to summary so a peer message at least surfaces SOMETHING.
+
+
+def _extract_text(request_body: Any, summary: str | None) -> str:
+    """Pull the human-readable text out of an A2A activity row.
+
+    Mirrors molecule-mcp-claude-channel/server.ts:445 (extractText) so
+    canvas-user messages and peer-agent messages render identically
+    across both inbound channels.
+    """
+    if not isinstance(request_body, dict):
+        return summary or "(empty A2A message)"
+
+    candidates: list[Any] = []
+    params = request_body.get("params") if isinstance(request_body.get("params"), dict) else None
+    if params:
+        message = params.get("message") if isinstance(params.get("message"), dict) else None
+        if message:
+            candidates.append(message.get("parts"))
+        candidates.append(params.get("parts"))
+    candidates.append(request_body.get("parts"))
+
+    # The A2A protocol's part discriminator field varies between SDK
+    # versions: a2a-sdk v0 uses ``type``, v1 uses ``kind``. The platform's
+    # activity_logs preserves whichever the original sender used, so we
+    # accept either. Verified live against a hosted SaaS workspace on
+    # 2026-04-30 — every canvas-user message arrived with ``kind`` and
+    # the type-only filter was silently falling through to summary.
+    for parts in candidates:
+        if isinstance(parts, list):
+            text = "".join(
+                p.get("text", "")
+                for p in parts
+                if isinstance(p, dict)
+                and (p.get("kind") == "text" or p.get("type") == "text")
+            )
+            if text:
+                return text
+    return summary or "(empty A2A message)"
+
+
+def message_from_activity(row: dict[str, Any]) -> InboxMessage:
+    """Convert one /activity row into an InboxMessage."""
+    request_body = row.get("request_body")
+    if isinstance(request_body, str):
+        # The Go handler returns request_body as json.RawMessage; httpx
+        # deserializes that to a dict already. But some legacy paths or
+        # mocked servers may return it as a string — handle defensively.
+        try:
+            request_body = json.loads(request_body)
+        except (TypeError, ValueError):
+            request_body = None
+
+    return InboxMessage(
+        activity_id=str(row.get("id", "")),
+        text=_extract_text(request_body, row.get("summary")),
+        peer_id=row.get("source_id") or "",
+        method=row.get("method") or "",
+        created_at=str(row.get("created_at", "")),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Poller — daemon thread that fills the queue from the activity API
+# ---------------------------------------------------------------------------
+
+
+def _poll_once(
+    state: InboxState,
+    platform_url: str,
+    workspace_id: str,
+    headers: dict[str, str],
+    timeout_secs: float = 10.0,
+) -> int:
+    """One poll iteration. Returns number of new messages enqueued.
+
+    Idempotent and stateless apart from the InboxState passed in —
+    safe to call from tests with a stub state + a real httpx mock.
+    """
+    import httpx
+
+    url = f"{platform_url}/workspaces/{workspace_id}/activity"
+    params: dict[str, str] = {"type": "a2a_receive"}
+    cursor = state.load_cursor()
+    if cursor:
+        params["since_id"] = cursor
+    else:
+        params["since_secs"] = str(INITIAL_BACKLOG_SECONDS)
+
+    try:
+        with httpx.Client(timeout=timeout_secs) as client:
+            resp = client.get(url, params=params, headers=headers)
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("inbox poller: GET /activity failed: %s", exc)
+        return 0
+
+    if resp.status_code == 410:
+        # Cursor pruned — drop back to the backlog window. The next
+        # poll picks up wherever the activity API has rows now.
+        logger.info(
+            "inbox poller: cursor %s expired (410); resetting to since_secs=%d",
+            cursor,
+            INITIAL_BACKLOG_SECONDS,
+        )
+        state.reset_cursor()
+        return 0
+
+    if resp.status_code >= 400:
+        logger.warning(
+            "inbox poller: HTTP %d from /activity: %s",
+            resp.status_code,
+            (resp.text or "")[:200],
+        )
+        return 0
+
+    try:
+        rows = resp.json()
+    except ValueError as exc:
+        logger.warning("inbox poller: non-JSON response: %s", exc)
+        return 0
+    if not isinstance(rows, list):
+        return 0
+
+    # since_id mode returns ASC (oldest first). since_secs mode returns
+    # DESC; reverse so we record in chronological order and the cursor
+    # we save is the freshest row.
+    if cursor is None:
+        rows = list(reversed(rows))
+
+    new_count = 0
+    last_id: str | None = None
+    for row in rows:
+        if not isinstance(row, dict):
+            continue
+        message = message_from_activity(row)
+        if not message.activity_id:
+            continue
+        state.record(message)
+        last_id = message.activity_id
+        new_count += 1
+
+    if last_id is not None:
+        state.save_cursor(last_id)
+    return new_count
+
+
+def _poll_loop(
+    state: InboxState,
+    platform_url: str,
+    workspace_id: str,
+    interval: float = POLL_INTERVAL_SECONDS,
+    stop_event: threading.Event | None = None,
+) -> None:
+    """Daemon-thread body: poll forever until stop_event fires.
+
+    auth_headers() is rebuilt every iteration so a token rotation via
+    env var or .auth_token file is picked up without a restart. Cheap
+    (a dict + an env read).
+    """
+    from platform_auth import auth_headers
+
+    while True:
+        try:
+            _poll_once(state, platform_url, workspace_id, auth_headers())
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("inbox poller: iteration crashed: %s", exc)
+        if stop_event is not None and stop_event.wait(interval):
+            return
+        if stop_event is None:
+            time.sleep(interval)
+
+
+def start_poller_thread(
+    state: InboxState,
+    platform_url: str,
+    workspace_id: str,
+    interval: float = POLL_INTERVAL_SECONDS,
+) -> threading.Thread:
+    """Spawn the poller as a daemon thread. Returns the Thread handle.
+
+    daemon=True so the poller dies with the main process — same
+    rationale as mcp_cli's heartbeat thread (no leaks, no stale
+    workspace writes after the operator hits Ctrl-C).
+    """
+    t = threading.Thread(
+        target=_poll_loop,
+        args=(state, platform_url, workspace_id, interval),
+        name="molecule-mcp-inbox-poller",
+        daemon=True,
+    )
+    t.start()
+    return t
+
+
+def default_cursor_path() -> Path:
+    """Standard cursor location: ``${CONFIGS_DIR}/.mcp_inbox_cursor``.
+
+    Mirrors mcp_cli's CONFIGS_DIR resolution so a single
+    operator-facing env var controls every persisted state file
+    (.auth_token + .mcp_inbox_cursor).
+    """
+    configs_dir = Path(os.environ.get("CONFIGS_DIR", "/configs"))
+    return configs_dir / ".mcp_inbox_cursor"
--- a/workspace/mcp_cli.py
+++ b/workspace/mcp_cli.py
@ -273,6 +273,19 @@ def main() -> None:
        _platform_register(platform_url, workspace_id, token)
        _start_heartbeat_thread(platform_url, workspace_id, token)

+    # Inbox poller — the inbound side of the standalone path. Without
+    # this thread, the universal MCP server is OUTBOUND-ONLY: an agent
+    # can call delegate_task / send_message_to_user but never observe
+    # canvas-user or peer-agent messages. The poller fills an in-memory
+    # queue from the platform's /activity?type=a2a_receive endpoint;
+    # the agent reads via wait_for_message / inbox_peek / inbox_pop.
+    #
+    # Same disable pattern as heartbeat: in-container callers (with
+    # push delivery via canvas WebSocket) skip this to avoid duplicate
+    # delivery; tests use the env to keep imports cheap.
+    if not os.environ.get("MOLECULE_MCP_DISABLE_INBOX", "").strip():
+        _start_inbox_poller(platform_url, workspace_id)
+
    # Env is valid — safe to import the heavy module now. Importing
    # earlier would trigger a2a_client.py:22's module-level RuntimeError
    # before our friendly help reaches the user.
@ -280,6 +293,28 @@ def main() -> None:
    cli_main()


+def _start_inbox_poller(platform_url: str, workspace_id: str) -> None:
+    """Activate the inbox singleton + spawn the poller daemon thread.
+
+    Done lazily here (not at module import) because importing inbox
+    pulls in platform_auth, which only resolves cleanly AFTER env
+    validation succeeds. Activation is idempotent within a process,
+    so a stray double-call (e.g. test harness re-entering main) is
+    harmless.
+
+    The poller thread is daemon=True — dies with the main process.
+    """
+    try:
+        import inbox
+    except ImportError as exc:
+        logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
+        return
+
+    state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
+    inbox.activate(state)
+    inbox.start_poller_thread(state, platform_url, workspace_id)
+
+
 def _read_token_file() -> str:
    """Read the token from ${CONFIGS_DIR}/.auth_token if present.

--- a/workspace/platform_tools/registry.py
+++ b/workspace/platform_tools/registry.py
@ -56,9 +56,12 @@ from a2a_tools import (
    tool_delegate_task,
    tool_delegate_task_async,
    tool_get_workspace_info,
+    tool_inbox_peek,
+    tool_inbox_pop,
    tool_list_peers,
    tool_recall_memory,
    tool_send_message_to_user,
+    tool_wait_for_message,
 )

 # Section name maps to the heading in the agent-facing system prompt.
@ -299,6 +302,94 @@ _SEND_MESSAGE_TO_USER = ToolSpec(
 )


+# ---------------------------------------------------------------------------
+# Inbox — inbound delivery for the standalone molecule-mcp path.
+#
+# These tools observe a poller-fed in-memory queue (see workspace/inbox.py).
+# They are universally registered so docs + adapters stay aligned, but
+# they only return real data in the standalone molecule-mcp runtime;
+# in-container runtimes return an informational "not enabled" message
+# because their delivery loop is push-based via the canvas WebSocket.
+# ---------------------------------------------------------------------------
+
+_WAIT_FOR_MESSAGE = ToolSpec(
+    name="wait_for_message",
+    short=(
+        "Block until the next inbound message (canvas user OR peer "
+        "agent) arrives, or until ``timeout_secs`` elapses."
+    ),
+    when_to_use=(
+        "Standalone-runtime ONLY (molecule-mcp wrapper). After "
+        "you reply, call this to wait for the next message — forms "
+        "the loop ``wait_for_message → respond → wait_for_message``. "
+        "Returns the head message non-destructively; call inbox_pop "
+        "with the activity_id once you've handled it. In-container "
+        "runtimes receive messages via push and should not call this."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "timeout_secs": {
+                "type": "number",
+                "description": (
+                    "Max seconds to block. Capped at 300. "
+                    "Default 60."
+                ),
+            },
+        },
+    },
+    impl=tool_wait_for_message,
+    section=A2A_SECTION,
+)
+
+_INBOX_PEEK = ToolSpec(
+    name="inbox_peek",
+    short="List pending inbound messages without removing them.",
+    when_to_use=(
+        "Standalone-runtime ONLY. Use to inspect what's queued "
+        "before deciding which to handle. Non-destructive — pair "
+        "with inbox_pop to consume after replying."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "limit": {
+                "type": "integer",
+                "description": "Max messages to return. Default 10.",
+            },
+        },
+    },
+    impl=tool_inbox_peek,
+    section=A2A_SECTION,
+)
+
+_INBOX_POP = ToolSpec(
+    name="inbox_pop",
+    short="Remove a handled message from the inbox queue by activity_id.",
+    when_to_use=(
+        "Standalone-runtime ONLY. Call after you've replied to a "
+        "message returned from wait_for_message or inbox_peek to "
+        "drop it from the queue. Idempotent — popping a missing "
+        "id reports removed=false without erroring."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "activity_id": {
+                "type": "string",
+                "description": (
+                    "activity_id of the message to remove (from "
+                    "inbox_peek / wait_for_message output)."
+                ),
+            },
+        },
+        "required": ["activity_id"],
+    },
+    impl=tool_inbox_pop,
+    section=A2A_SECTION,
+)
+
+
 # ---------------------------------------------------------------------------
 # HMA — hierarchical persistent memory
 # ---------------------------------------------------------------------------
@ -374,6 +465,10 @@ TOOLS: list[ToolSpec] = [
    _LIST_PEERS,
    _GET_WORKSPACE_INFO,
    _SEND_MESSAGE_TO_USER,
+    # Inbox (standalone-only; in-container returns informational error)
+    _WAIT_FOR_MESSAGE,
+    _INBOX_PEEK,
+    _INBOX_POP,
    # HMA
    _COMMIT_MEMORY,
    _RECALL_MEMORY,
--- a/workspace/tests/snapshots/a2a_instructions_mcp.txt
+++ b/workspace/tests/snapshots/a2a_instructions_mcp.txt
@ -6,6 +6,9 @@
 - **list_peers**: List the workspaces this agent can communicate with — name, ID, status, role for each.
 - **get_workspace_info**: Get this workspace's own info — ID, name, role, tier, parent, status.
 - **send_message_to_user**: Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.
+- **wait_for_message**: Block until the next inbound message (canvas user OR peer agent) arrives, or until ``timeout_secs`` elapses.
+- **inbox_peek**: List pending inbound messages without removing them.
+- **inbox_pop**: Remove a handled message from the inbox queue by activity_id.

 ### delegate_task
 Use for QUICK questions and small sub-tasks where you can afford to wait inline. Returns the peer's response text directly. For longer-running work (research, multi-minute jobs) use delegate_task_async + check_task_status instead so you don't hold this workspace busy waiting.
@ -25,4 +28,13 @@ Use to introspect your own identity (e.g. before reporting back to the user, or
 ### send_message_to_user
 Use proactively across the lifecycle of a task — early to acknowledge, mid-flight to update, late to deliver. Never paste file URLs in the message body — always pass absolute paths in `attachments` so the platform serves them as download chips (works on SaaS where external file hosts are unreachable).

+### wait_for_message
+Standalone-runtime ONLY (molecule-mcp wrapper). After you reply, call this to wait for the next message — forms the loop ``wait_for_message → respond → wait_for_message``. Returns the head message non-destructively; call inbox_pop with the activity_id once you've handled it. In-container runtimes receive messages via push and should not call this.
+
+### inbox_peek
+Standalone-runtime ONLY. Use to inspect what's queued before deciding which to handle. Non-destructive — pair with inbox_pop to consume after replying.
+
+### inbox_pop
+Standalone-runtime ONLY. Call after you've replied to a message returned from wait_for_message or inbox_peek to drop it from the queue. Idempotent — popping a missing id reports removed=false without erroring.
+
 Always use list_peers first to discover available workspace IDs. Access control is enforced — you can only reach siblings and parent/children. If a delegation returns a DELEGATION FAILED message, do NOT forward the raw error to the user. Instead: (1) try a different peer, (2) handle the task yourself, or (3) tell the user which peer is unavailable and provide your own best answer.
--- a/workspace/tests/test_inbox.py
+++ b/workspace/tests/test_inbox.py
@ -0,0 +1,444 @@
+"""Tests for workspace/inbox.py — InboxState + activity API poller.
+
+Covers the round-trip from a /activity row to an InboxMessage that the
+agent observes via the three new MCP tools, plus the cursor-persistence
+ 410-recovery behavior that keeps the standalone molecule-mcp from
+re-delivering already-handled messages after a restart.
+"""
+from __future__ import annotations
+
+import threading
+import time
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import inbox
+
+
+@pytest.fixture(autouse=True)
+def _reset_singleton():
+    """Each test starts with a clean module singleton + a fresh
+    InboxState. Activation in one test must not leak into the next."""
+    inbox._STATE = None
+    yield
+    inbox._STATE = None
+
+
+@pytest.fixture()
+def state(tmp_path: Path) -> inbox.InboxState:
+    return inbox.InboxState(cursor_path=tmp_path / ".mcp_inbox_cursor")
+
+
+# ---------------------------------------------------------------------------
+# _extract_text — envelope shape coverage
+# ---------------------------------------------------------------------------
+
+
+def test_extract_text_jsonrpc_message_wrapper():
+    body = {
+        "jsonrpc": "2.0",
+        "method": "message/send",
+        "params": {"message": {"parts": [{"type": "text", "text": "hello"}]}},
+    }
+    assert inbox._extract_text(body, None) == "hello"
+
+
+def test_extract_text_a2a_v1_kind_field():
+    """A2A SDK v1 uses ``kind`` instead of ``type`` as the part
+    discriminator. Hosted SaaS workspaces send the v1 shape today —
+    this case is what live canvas-user messages look like in
+    activity_logs.request_body."""
+    body = {
+        "params": {
+            "message": {
+                "role": "user",
+                "parts": [{"kind": "text", "text": "hello from canvas"}],
+            }
+        }
+    }
+    assert inbox._extract_text(body, None) == "hello from canvas"
+
+
+def test_extract_text_jsonrpc_params_parts():
+    body = {"params": {"parts": [{"type": "text", "text": "from peer"}]}}
+    assert inbox._extract_text(body, None) == "from peer"
+
+
+def test_extract_text_shorthand_parts():
+    body = {"parts": [{"type": "text", "text": "shorthand"}]}
+    assert inbox._extract_text(body, None) == "shorthand"
+
+
+def test_extract_text_concatenates_multiple_parts():
+    body = {
+        "parts": [
+            {"type": "text", "text": "hello "},
+            {"type": "text", "text": "world"},
+            {"type": "image", "url": "https://example.invalid/x.png"},
+        ]
+    }
+    assert inbox._extract_text(body, None) == "hello world"
+
+
+def test_extract_text_falls_back_to_summary():
+    assert inbox._extract_text(None, "fallback") == "fallback"
+    assert inbox._extract_text({"unrelated": True}, "fallback") == "fallback"
+
+
+def test_extract_text_returns_placeholder_when_nothing_usable():
+    assert inbox._extract_text(None, None) == "(empty A2A message)"
+
+
+# ---------------------------------------------------------------------------
+# message_from_activity
+# ---------------------------------------------------------------------------
+
+
+def test_message_from_activity_canvas_user():
+    row = {
+        "id": "act-1",
+        "source_id": None,
+        "method": "message/send",
+        "summary": "ignored",
+        "request_body": {
+            "params": {"message": {"parts": [{"type": "text", "text": "hi"}]}}
+        },
+        "created_at": "2026-04-30T22:00:00Z",
+    }
+    msg = inbox.message_from_activity(row)
+    assert msg.activity_id == "act-1"
+    assert msg.text == "hi"
+    assert msg.peer_id == ""
+    assert msg.method == "message/send"
+    d = msg.to_dict()
+    assert d["kind"] == "canvas_user"
+
+
+def test_message_from_activity_peer_agent():
+    row = {
+        "id": "act-2",
+        "source_id": "ws-peer-uuid",
+        "method": "tasks/send",
+        "summary": "delegate",
+        "request_body": {"parts": [{"type": "text", "text": "do task"}]},
+        "created_at": "2026-04-30T22:01:00Z",
+    }
+    msg = inbox.message_from_activity(row)
+    assert msg.peer_id == "ws-peer-uuid"
+    assert msg.to_dict()["kind"] == "peer_agent"
+
+
+def test_message_from_activity_handles_string_request_body():
+    row = {
+        "id": "act-3",
+        "source_id": None,
+        "method": "message/send",
+        "summary": None,
+        "request_body": '{"parts": [{"type": "text", "text": "json string"}]}',
+        "created_at": "2026-04-30T22:02:00Z",
+    }
+    assert inbox.message_from_activity(row).text == "json string"
+
+
+# ---------------------------------------------------------------------------
+# InboxState — queue + wait/peek/pop semantics
+# ---------------------------------------------------------------------------
+
+
+def _msg(activity_id: str, text: str = "", peer_id: str = "") -> inbox.InboxMessage:
+    return inbox.InboxMessage(
+        activity_id=activity_id,
+        text=text or activity_id,
+        peer_id=peer_id,
+        method="message/send",
+        created_at="2026-04-30T22:00:00Z",
+    )
+
+
+def test_record_then_peek(state: inbox.InboxState):
+    state.record(_msg("a"))
+    state.record(_msg("b"))
+    out = state.peek(limit=10)
+    assert [m.activity_id for m in out] == ["a", "b"]
+
+
+def test_record_dedupes_by_activity_id(state: inbox.InboxState):
+    state.record(_msg("a"))
+    state.record(_msg("a"))  # same id — must drop the second
+    assert len(state.peek(10)) == 1
+
+
+def test_pop_removes_specific_message(state: inbox.InboxState):
+    state.record(_msg("a"))
+    state.record(_msg("b"))
+    removed = state.pop("a")
+    assert removed is not None and removed.activity_id == "a"
+    remaining = state.peek(10)
+    assert [m.activity_id for m in remaining] == ["b"]
+
+
+def test_pop_missing_id_returns_none(state: inbox.InboxState):
+    state.record(_msg("a"))
+    # Bind the result before asserting so the call still runs under
+    # ``python -O`` (which strips bare assert statements).
+    result = state.pop("does-not-exist")
+    assert result is None
+    # Original message still present
+    assert len(state.peek(10)) == 1
+
+
+def test_wait_returns_existing_head_immediately(state: inbox.InboxState):
+    state.record(_msg("a"))
+    start = time.monotonic()
+    msg = state.wait(timeout_secs=5.0)
+    elapsed = time.monotonic() - start
+    assert msg is not None and msg.activity_id == "a"
+    assert elapsed < 0.5, f"wait should not block when queue non-empty (took {elapsed:.2f}s)"
+
+
+def test_wait_blocks_until_message_arrives(state: inbox.InboxState):
+    def producer():
+        time.sleep(0.05)
+        state.record(_msg("late"))
+
+    threading.Thread(target=producer, daemon=True).start()
+    msg = state.wait(timeout_secs=2.0)
+    assert msg is not None and msg.activity_id == "late"
+
+
+def test_wait_returns_none_on_timeout(state: inbox.InboxState):
+    msg = state.wait(timeout_secs=0.05)
+    assert msg is None
+
+
+def test_wait_does_not_pop(state: inbox.InboxState):
+    """wait() is non-destructive — caller decides when to inbox_pop."""
+    state.record(_msg("a"))
+    state.wait(timeout_secs=1.0)
+    state.wait(timeout_secs=1.0)
+    assert len(state.peek(10)) == 1
+
+
+# ---------------------------------------------------------------------------
+# Cursor persistence
+# ---------------------------------------------------------------------------
+
+
+def test_load_cursor_returns_none_when_file_absent(state: inbox.InboxState):
+    assert state.load_cursor() is None
+
+
+def test_save_then_load_cursor_round_trip(state: inbox.InboxState):
+    state.save_cursor("act-cursor-1")
+    # Reset the cached flag to force a re-read
+    state._cursor_loaded = False
+    state._cursor = None
+    assert state.load_cursor() == "act-cursor-1"
+
+
+def test_save_cursor_creates_parent_directory(tmp_path: Path):
+    nested = tmp_path / "nested" / "configs" / ".mcp_inbox_cursor"
+    state = inbox.InboxState(cursor_path=nested)
+    state.save_cursor("act-x")
+    assert nested.read_text() == "act-x"
+
+
+def test_reset_cursor_deletes_file(state: inbox.InboxState):
+    state.save_cursor("act-y")
+    assert state.cursor_path.is_file()
+    state.reset_cursor()
+    assert not state.cursor_path.is_file()
+    assert state.load_cursor() is None
+
+
+# ---------------------------------------------------------------------------
+# Module singleton
+# ---------------------------------------------------------------------------
+
+
+def test_get_state_returns_none_before_activate():
+    assert inbox.get_state() is None
+
+
+def test_activate_then_get_state(state: inbox.InboxState):
+    inbox.activate(state)
+    assert inbox.get_state() is state
+
+
+def test_activate_idempotent(state: inbox.InboxState):
+    inbox.activate(state)
+    inbox.activate(state)  # same state — no-op, no warning expected
+    assert inbox.get_state() is state
+
+
+# ---------------------------------------------------------------------------
+# _poll_once — HTTP behavior
+# ---------------------------------------------------------------------------
+
+
+def _make_response(status_code: int, json_body: Any = None, text: str = "") -> MagicMock:
+    resp = MagicMock()
+    resp.status_code = status_code
+    if json_body is not None:
+        resp.json.return_value = json_body
+    else:
+        resp.json.side_effect = ValueError("no json")
+    resp.text = text
+    return resp
+
+
+def _patch_httpx(returning: MagicMock):
+    """Replace httpx.Client with a context-manager mock that returns
+    ``returning`` from .get(). Captures the GET call args for assertion."""
+    client = MagicMock()
+    client.__enter__ = MagicMock(return_value=client)
+    client.__exit__ = MagicMock(return_value=False)
+    client.get = MagicMock(return_value=returning)
+    return patch("httpx.Client", return_value=client), client
+
+
+def test_poll_once_fresh_start_uses_since_secs(state: inbox.InboxState):
+    resp = _make_response(200, [])
+    p, client = _patch_httpx(resp)
+    with p:
+        n = inbox._poll_once(state, "http://platform", "ws-1", {})
+    assert n == 0
+    _, kwargs = client.get.call_args
+    assert kwargs["params"]["type"] == "a2a_receive"
+    assert "since_secs" in kwargs["params"]
+    assert "since_id" not in kwargs["params"]
+
+
+def test_poll_once_with_cursor_uses_since_id(state: inbox.InboxState):
+    state.save_cursor("act-existing")
+    resp = _make_response(200, [])
+    p, client = _patch_httpx(resp)
+    with p:
+        inbox._poll_once(state, "http://platform", "ws-1", {})
+    _, kwargs = client.get.call_args
+    assert kwargs["params"]["since_id"] == "act-existing"
+    assert "since_secs" not in kwargs["params"]
+
+
+def test_poll_once_410_resets_cursor(state: inbox.InboxState):
+    state.save_cursor("act-stale")
+    resp = _make_response(410, text="cursor pruned")
+    p, _ = _patch_httpx(resp)
+    with p:
+        inbox._poll_once(state, "http://platform", "ws-1", {})
+    assert state.load_cursor() is None
+    assert not state.cursor_path.is_file()
+
+
+def test_poll_once_records_messages_and_advances_cursor(state: inbox.InboxState):
+    state.save_cursor("act-old")
+    rows = [
+        {
+            "id": "act-1",
+            "source_id": None,
+            "method": "message/send",
+            "summary": None,
+            "request_body": {"parts": [{"type": "text", "text": "first"}]},
+            "created_at": "2026-04-30T22:00:00Z",
+        },
+        {
+            "id": "act-2",
+            "source_id": "ws-peer",
+            "method": "tasks/send",
+            "summary": None,
+            "request_body": {"parts": [{"type": "text", "text": "second"}]},
+            "created_at": "2026-04-30T22:00:01Z",
+        },
+    ]
+    resp = _make_response(200, rows)
+    p, _ = _patch_httpx(resp)
+    with p:
+        n = inbox._poll_once(state, "http://platform", "ws-1", {})
+    assert n == 2
+    queue = state.peek(10)
+    assert [m.activity_id for m in queue] == ["act-1", "act-2"]
+    assert state.load_cursor() == "act-2"
+
+
+def test_poll_once_500_does_not_raise(state: inbox.InboxState):
+    resp = _make_response(500, text="boom")
+    p, _ = _patch_httpx(resp)
+    with p:
+        n = inbox._poll_once(state, "http://platform", "ws-1", {})
+    assert n == 0
+    # Cursor untouched
+    assert state.load_cursor() is None
+
+
+def test_poll_once_handles_non_list_payload(state: inbox.InboxState):
+    resp = _make_response(200, {"error": "unexpected"})
+    p, _ = _patch_httpx(resp)
+    with p:
+        n = inbox._poll_once(state, "http://platform", "ws-1", {})
+    assert n == 0
+
+
+def test_poll_once_initial_backlog_reverses_to_chronological(state: inbox.InboxState):
+    """When no cursor is set, /activity returns DESC; the poller must
+    reverse so the saved cursor is the freshest row + record order
+    is chronological."""
+    rows_desc = [
+        {
+            "id": "act-newest",
+            "source_id": None,
+            "method": "message/send",
+            "summary": None,
+            "request_body": {"parts": [{"type": "text", "text": "newest"}]},
+            "created_at": "2026-04-30T22:00:02Z",
+        },
+        {
+            "id": "act-oldest",
+            "source_id": None,
+            "method": "message/send",
+            "summary": None,
+            "request_body": {"parts": [{"type": "text", "text": "oldest"}]},
+            "created_at": "2026-04-30T22:00:00Z",
+        },
+    ]
+    resp = _make_response(200, rows_desc)
+    p, _ = _patch_httpx(resp)
+    with p:
+        inbox._poll_once(state, "http://platform", "ws-1", {})
+    queue = state.peek(10)
+    assert [m.activity_id for m in queue] == ["act-oldest", "act-newest"]
+    # Cursor is the newest row, so the next poll picks up only what's
+    # newer — re-restoring forward chronological progression.
+    assert state.load_cursor() == "act-newest"
+
+
+def test_start_poller_thread_is_daemon(state: inbox.InboxState):
+    """Daemon flag is required so the poller dies with the parent
+    process; a non-daemon poller would leak across `claude` restarts
+    and write to a stale workspace."""
+    resp = _make_response(200, [])
+    p, _ = _patch_httpx(resp)
+    with p, patch("platform_auth.auth_headers", return_value={}):
+        # Use a very short interval so the loop body runs at least once
+        # before we exit the test.
+        t = inbox.start_poller_thread(state, "http://platform", "ws-1", interval=0.01)
+        time.sleep(0.05)
+    assert t.daemon is True
+    assert t.is_alive()
+
+
+# ---------------------------------------------------------------------------
+# default_cursor_path respects CONFIGS_DIR
+# ---------------------------------------------------------------------------
+
+
+def test_default_cursor_path_uses_configs_dir(monkeypatch, tmp_path: Path):
+    monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
+    assert inbox.default_cursor_path() == tmp_path / ".mcp_inbox_cursor"
+
+
+def test_default_cursor_path_falls_back_to_default(monkeypatch):
+    monkeypatch.delenv("CONFIGS_DIR", raising=False)
+    assert inbox.default_cursor_path() == Path("/configs") / ".mcp_inbox_cursor"