Merge pull request #2437 from Molecule-AI/staging

staging → main: auto-promote c901d52
2026-05-01 03:42:34 +00:00 · 2026-05-01 03:42:34 +00:00 · 76c604fb4f
commit 76c604fb4f
parent ed29ad0d2a 5ad41f63ce
10 changed files with 522 additions and 4 deletions
--- a/scripts/build_runtime_package.py
+++ b/scripts/build_runtime_package.py
@ -150,6 +150,13 @@ def rewrite_imports(text: str, regex: re.Pattern) -> str:
    `import X`           → `import molecule_runtime.X as X`  (preserve binding)
    `from X import Y`    → `from molecule_runtime.X import Y`
    `from X.sub import Y` → `from molecule_runtime.X.sub import Y`
+
+    Rejects `import X as Y` because the rewrite would produce
+    `import molecule_runtime.X as X as Y`, a syntax error. The PR #2433
+    incident shipped this exact pattern past `Python Lint & Test` (which
+    runs against pre-rewrite source) but blew up the wheel-smoke gate.
+    Detecting it here turns the silent build failure into a build-time
+    error with a clear path: use `from X import …` or plain `import X`.
    """
    def repl(m: re.Match) -> str:
        indent, kw, mod, rest = m.group("indent"), m.group("kw"), m.group("mod"), m.group("rest")
@ -163,6 +170,26 @@ def rewrite_imports(text: str, regex: re.Pattern) -> str:
            # `import X.sub` — rewrite as `import molecule_runtime.X.sub` and
            # leave the trailing dot pattern intact for the rest of the line.
            return f"{indent}import molecule_runtime.{mod}{rest}"
+        # Detect `import X as Y` — the regex's `rest` group captures only
+        # the immediate following char (whitespace, comma, or EOL), so we
+        # have to peek at the surrounding line context. The match start is
+        # at the line's `import` keyword; everything after the matched
+        # name on the same line is what the source author wrote.
+        line_start = text.rfind("\n", 0, m.start()) + 1
+        line_end = text.find("\n", m.end())
+        if line_end == -1:
+            line_end = len(text)
+        line_after = text[m.end() - len(rest):line_end]
+        # Strip comments from consideration so `import X  # noqa` doesn't trip.
+        line_after_no_comment = line_after.split("#", 1)[0]
+        if re.search(r"^\s*as\s+\w+", line_after_no_comment):
+            raise ValueError(
+                f"rewrite_imports: cannot rewrite 'import {mod} as <alias>' on a "
+                f"workspace module — the regex would produce "
+                f"'import molecule_runtime.{mod} as {mod} as <alias>', invalid syntax. "
+                f"Use 'from {mod} import …' or plain 'import {mod}' instead. "
+                f"Offending line: {text[line_start:line_end]!r}"
+            )
        # Plain `import X` — alias preserves the local name.
        return f"{indent}import molecule_runtime.{mod} as {mod}{rest}"
    return regex.sub(repl, text)
--- a/scripts/wheel_smoke.py
+++ b/scripts/wheel_smoke.py
@ -52,11 +52,13 @@ def smoke_imports_and_invariants() -> None:
        InboxState,
        activate as inbox_activate,
        get_state as inbox_get_state,
+        set_notification_callback as inbox_set_notification_callback,
        start_poller_thread as inbox_start_poller_thread,
    )
    assert callable(inbox_activate), "inbox.activate must be callable"
    assert callable(inbox_get_state), "inbox.get_state must be callable"
    assert callable(inbox_start_poller_thread), "inbox.start_poller_thread must be callable"
+    assert callable(inbox_set_notification_callback), "inbox.set_notification_callback must be callable"

    assert a2a_client._A2A_ERROR_PREFIX, "a2a_client missing error sentinel"
    assert callable(get_adapter), "adapters.get_adapter must be callable"
--- a/workspace-server/internal/handlers/terminal.go
+++ b/workspace-server/internal/handlers/terminal.go
@ -277,12 +277,26 @@ var openTunnelCmd = func(o eicSSHOptions) *exec.Cmd {
 // to 22; with CP provisioning today the workspace runs as a native
 // process under the ubuntu user, so landing at ubuntu's shell IS the
 // terminal experience.
+//
+// ConnectTimeout=10 is the user-experience guard — without it, ssh waits
+// indefinitely for the remote sshd's banner. When the workspace EC2's
+// sshd is unresponsive (mid-restart, SG drop, AMI without ec2-instance-
+// connect installed) the canvas's xterm shows the user's typed bytes
+// echoed back by the workspace-server's *local* PTY (cooked + echo mode
+// before ssh finishes its handshake) and then closes silently when CF's
+// idle WebSocket timer fires, with no "Connection refused" or "Permission
+// denied" output ever reaching the user. Capping at 10s makes the failure
+// surface as a real ssh error message in the terminal — caught 2026-04-30
+// when hongmingwang's hermes shell hung after the heartbeat-fix redeploy
+// and a probe at /workspaces/<id>/terminal sat for 60s with the only
+// frame being the local-PTY echo of a single 'X' typed mid-handshake.
 var sshCommandCmd = func(o eicSSHOptions) *exec.Cmd {
 	return exec.Command(
 		"ssh",
 		"-i", o.PrivateKeyPath,
 		"-o", "StrictHostKeyChecking=no",
 		"-o", "UserKnownHostsFile=/dev/null",
+		"-o", "ConnectTimeout=10",
 		"-o", "ServerAliveInterval=30",
 		"-o", "ServerAliveCountMax=3",
 		"-p", fmt.Sprintf("%d", o.LocalPort),
--- a/workspace-server/internal/handlers/terminal_test.go
+++ b/workspace-server/internal/handlers/terminal_test.go
@ -320,6 +320,7 @@ func TestSSHCommandCmd_BuildsArgv(t *testing.T) {
 		"-i", "/tmp/k",
 		"-o", "StrictHostKeyChecking=no",
 		"-o", "UserKnownHostsFile=/dev/null",
+		"-o", "ConnectTimeout=10",
 		"-o", "ServerAliveInterval=30",
 		"-o", "ServerAliveCountMax=3",
 		"-p", "2222",
@ -490,3 +491,57 @@ func TestKI005_OrgToken_SkipsValidateToken(t *testing.T) {
 	}
 }

+// TestSSHCommandCmd_ConnectTimeoutPresent pins the user-experience guard
+// against ssh-handshake-hang. Without ConnectTimeout, ssh waits forever
+// for the remote sshd's banner — which masquerades as a "silently dead"
+// shell to the user, because the workspace-server's local PTY is in
+// cooked + echo mode before ssh finishes its handshake, so the canvas
+// echoes the user's keystrokes back without ever reaching remote bash,
+// and Cloudflare eventually closes the WebSocket on idle (~100s) with
+// no error frame to surface what went wrong.
+//
+// Repro 2026-04-30: a 60s probe at hongmingwang's hermes /terminal
+// endpoint after the heartbeat-fix redeploy showed only the local-PTY
+// echo of a single 'X' typed mid-handshake. Workspace EC2 was up and
+// heartbeating but its sshd was unresponsive; ssh hung indefinitely.
+//
+// Behavior-based: matches the literal `-o ConnectTimeout=N` arg pair so
+// this stays pinned even if the rest of the args reorder. Does not pin
+// the exact value — operators may tune it — but does pin presence.
+func TestSSHCommandCmd_ConnectTimeoutPresent(t *testing.T) {
+	t.Parallel()
+
+	cmd := sshCommandCmd(eicSSHOptions{
+		InstanceID:     "i-test",
+		OSUser:         "ubuntu",
+		Region:         "us-east-2",
+		LocalPort:      2222,
+		PrivateKeyPath: "/tmp/test-key",
+	})
+
+	args := cmd.Args
+	found := false
+	for i, a := range args {
+		if a != "-o" {
+			continue
+		}
+		if i+1 >= len(args) {
+			continue
+		}
+		val := args[i+1]
+		if len(val) >= len("ConnectTimeout=") &&
+			val[:len("ConnectTimeout=")] == "ConnectTimeout=" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("sshCommandCmd is missing `-o ConnectTimeout=N` — without it, "+
+			"ssh hangs forever when the workspace EC2's sshd is unresponsive "+
+			"and the canvas terminal silently dies on Cloudflare's idle WS "+
+			"timeout with no error message reaching the user. See terminal.go "+
+			"sshCommandCmd comment (2026-04-30 hongmingwang hermes). args=%v",
+			args)
+	}
+}
+
--- a/workspace/a2a_mcp_server.py
+++ b/workspace/a2a_mcp_server.py
@ -17,6 +17,10 @@ import json
 import logging
 import sys

+import inbox  # noqa: F401  — bridge wiring lives in main(); the rewriter
+#                              produces `import molecule_runtime.inbox as inbox`
+#                              which preserves this binding for set_notification_callback.
+
 from a2a_tools import (
    tool_check_task_status,
    tool_commit_memory,
@ -130,6 +134,44 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
    return f"Unknown tool: {name}"


+# --- MCP Notification bridge ---
+
+# `notifications/claude/channel` matches the contract used by the
+# molecule-mcp-claude-channel bun bridge (server.ts:509). Claude Code's
+# MCP runtime treats this method as a conversation interrupt — `content`
+# becomes the agent turn, `meta` is structured metadata. Notification-
+# capable hosts (Claude Code today; any compliant client tomorrow)
+# get push UX automatically; pollers (`wait_for_message` / `inbox_peek`)
+# still work unchanged. See task #46 + the deprecation path documented
+# in workspace/inbox.py:set_notification_callback.
+_CHANNEL_NOTIFICATION_METHOD = "notifications/claude/channel"
+
+
+def _build_channel_notification(msg: dict) -> dict:
+    """Transform an ``InboxMessage.to_dict()`` into the MCP notification
+    envelope expected by Claude Code's channel-bridge contract.
+
+    Pure function so the wire shape is unit-testable without spinning
+    up an asyncio loop. The wire-up in ``main()`` just composes this
+    with ``asyncio.run_coroutine_threadsafe``.
+    """
+    return {
+        "jsonrpc": "2.0",
+        "method": _CHANNEL_NOTIFICATION_METHOD,
+        "params": {
+            "content": msg.get("text", ""),
+            "meta": {
+                "source": "molecule",
+                "kind": msg.get("kind", ""),
+                "peer_id": msg.get("peer_id", ""),
+                "method": msg.get("method", ""),
+                "activity_id": msg.get("activity_id", ""),
+                "ts": msg.get("created_at", ""),
+            },
+        },
+    }
+
+
 # --- MCP Server (JSON-RPC over stdio) ---

 async def main():  # pragma: no cover
@ -148,6 +190,34 @@ async def main():  # pragma: no cover
        writer.write(data.encode())
        await writer.drain()

+    # Wire the inbox → MCP notification bridge. Inbox poller (daemon
+    # thread) calls into here when a new activity row lands; we
+    # schedule the notification onto the asyncio loop and best-effort
+    # fire it on the same stdout the responses go to.
+    loop = asyncio.get_running_loop()
+
+    async def _emit_notification(payload: dict) -> None:
+        data = json.dumps(payload) + "\n"
+        writer.write(data.encode())
+        try:
+            await writer.drain()
+        except Exception:  # noqa: BLE001
+            # Closed pipe (host disconnected) shouldn't crash the
+            # inbox poller; let it sit until the host reconnects.
+            pass
+
+    def _on_inbox_message(msg: dict) -> None:
+        try:
+            asyncio.run_coroutine_threadsafe(
+                _emit_notification(_build_channel_notification(msg)),
+                loop,
+            )
+        except RuntimeError:
+            # Loop closed during shutdown — best-effort, swallow.
+            pass
+
+    inbox.set_notification_callback(_on_inbox_message)
+
    buffer = ""
    while True:
        try:
--- a/workspace/config.py
+++ b/workspace/config.py
@ -323,7 +323,19 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
            args=runtime_raw.get("args", []),
            required_env=runtime_raw.get("required_env", []),
            timeout=runtime_raw.get("timeout", 0),
-            model=runtime_raw.get("model", ""),
+            # Fall back to top-level resolved `model` (which already honors
+            # MODEL_PROVIDER env override, line 277) when YAML doesn't carry
+            # runtime_config.model.  Without this fallback, SaaS workspaces
+            # silently boot with the adapter's hard-coded default —
+            # claude-code-default reads `runtime_config.model or "sonnet"`,
+            # so a user who picks Opus in the canvas Config tab gets Sonnet
+            # on the next CP-driven restart. Root cause: the CP user-data
+            # script regenerates /configs/config.yaml at every boot with
+            # only `name`, `runtime`, `a2a` keys (intentionally minimal so
+            # it doesn't carry stale state), losing runtime_config.model.
+            # MODEL_PROVIDER is plumbed as an env var, so picking it up via
+            # the top-level resolved model keeps the selection sticky.
+            model=runtime_raw.get("model") or model,
            # Deprecated fields — kept for backward compat
            auth_token_env=runtime_raw.get("auth_token_env", ""),
            auth_token_file=runtime_raw.get("auth_token_file", ""),
--- a/workspace/inbox.py
+++ b/workspace/inbox.py
@ -53,7 +53,7 @@ import time
 from collections import deque
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any
+from typing import Any, Callable

 logger = logging.getLogger(__name__)

@ -173,10 +173,14 @@ class InboxState:
            logger.warning("inbox: failed to delete cursor %s: %s", self.cursor_path, exc)

    def record(self, message: InboxMessage) -> None:
-        """Append a message and wake any waiter.
+        """Append a message, wake any waiter, and fire the notification
+        callback (if registered) for push-UX-capable hosts.

        Skips a row whose activity_id we've already queued — defensive
-        against the poller racing with the consumer + cursor save.
+        against the poller racing with the consumer + cursor save. The
+        dedupe short-circuits BEFORE the notification fires, so a
+        notification-capable host doesn't see duplicate push events on
+        backlog overlap.
        """
        with self._lock:
            for existing in self._queue:
@ -184,6 +188,19 @@ class InboxState:
                    return
            self._queue.append(message)
            self._arrival.set()
+        # Fire notification AFTER releasing the lock so the callback
+        # is free to do anything (including calling back into inbox)
+        # without deadlock. Best-effort: a raising callback must not
+        # prevent the message from landing in the queue — observability
+        # is more important than push delivery.
+        cb = _NOTIFICATION_CALLBACK
+        if cb is not None:
+            try:
+                cb(message.to_dict())
+            except Exception:
+                logger.warning(
+                    "inbox: notification callback raised", exc_info=True
+                )

    def peek(self, limit: int = 10) -> list[InboxMessage]:
        """Return up to ``limit`` pending messages without removing them."""
@ -240,6 +257,35 @@ class InboxState:
 _STATE: InboxState | None = None


+# Notification bridge — set by the universal MCP server (a2a_mcp_server.py)
+# at startup so that new inbox arrivals can be pushed to notification-
+# capable hosts (Claude Code) as MCP `notifications/claude/channel`
+# events. Kept module-level (rather than a method on InboxState) so the
+# inbox doesn't need to know about MCP — a thin pluggable seam.
+#
+# Defaults to None: in-container runtimes that don't activate the inbox
+# also don't push notifications, and tests start clean. The wheel's
+# wiring is exercised by tests/test_a2a_mcp_server.py + the bridge
+# tests below.
+_NOTIFICATION_CALLBACK: Callable[[dict], None] | None = None
+
+
+def set_notification_callback(cb: Callable[[dict], None] | None) -> None:
+    """Register (or clear) the per-message notification callback.
+
+    The callback receives ``InboxMessage.to_dict()`` for each new
+    arrival — same shape ``inbox_peek`` returns to the agent, so a
+    bridge can build its MCP notification payload without re-deriving
+    fields.
+
+    Best-effort: a raising callback does NOT prevent the message from
+    landing in the queue (see ``InboxState.record``). Pass ``None`` to
+    clear (used by tests + the wheel's shutdown path).
+    """
+    global _NOTIFICATION_CALLBACK
+    _NOTIFICATION_CALLBACK = cb
+
+
 def activate(state: InboxState) -> None:
    """Register an InboxState as the singleton this module exposes.

--- a/workspace/tests/test_a2a_mcp_server.py
+++ b/workspace/tests/test_a2a_mcp_server.py
@ -138,3 +138,102 @@ def test_attachments_param_description_emphasizes_REQUIRED():
        assert forbidden in desc, (
            f"`attachments` description must call out {forbidden!r} as a wrong alternative"
        )
+
+
+# ============== Inbox → MCP notification bridge (2026-05-01) ==============
+# Notification-capable hosts (Claude Code) get push UX when a new inbound
+# message lands; pollers (wait_for_message/inbox_peek) keep working.
+# `_build_channel_notification` is the pure shape transformer — wire-up
+# in main() composes it with asyncio.run_coroutine_threadsafe.
+
+
+def test_build_channel_notification_method_matches_claude_contract():
+    """Method MUST be `notifications/claude/channel` exactly — that's
+    what Claude Code's MCP runtime listens for as a conversation
+    interrupt. Same string as the bun channel bridge sends
+    (server.ts:509) so this is a drop-in replacement."""
+    from a2a_mcp_server import _build_channel_notification
+
+    payload = _build_channel_notification({
+        "activity_id": "act-1",
+        "text": "hello",
+        "peer_id": "",
+        "kind": "canvas_user",
+        "method": "message/send",
+        "created_at": "2026-05-01T00:00:00Z",
+    })
+
+    assert payload["method"] == "notifications/claude/channel"
+    assert payload["jsonrpc"] == "2.0"
+
+
+def test_build_channel_notification_content_is_message_text():
+    """`content` is what becomes the agent conversation turn —
+    pulled directly from the inbox message text."""
+    from a2a_mcp_server import _build_channel_notification
+
+    payload = _build_channel_notification({
+        "activity_id": "act-1",
+        "text": "hello from canvas",
+        "peer_id": "",
+        "kind": "canvas_user",
+        "method": "message/send",
+        "created_at": "2026-05-01T00:00:00Z",
+    })
+
+    assert payload["params"]["content"] == "hello from canvas"
+
+
+def test_build_channel_notification_meta_carries_routing_fields():
+    """Meta must include kind, peer_id, method, activity_id, ts —
+    fields the agent or downstream tooling needs to route a reply
+    (canvas_user → /notify, peer_agent → /a2a) and to acknowledge
+    via inbox_pop."""
+    from a2a_mcp_server import _build_channel_notification
+
+    payload = _build_channel_notification({
+        "activity_id": "act-7",
+        "text": "ping",
+        "peer_id": "ws-peer-uuid",
+        "kind": "peer_agent",
+        "method": "message/send",
+        "created_at": "2026-05-01T01:23:45Z",
+    })
+    meta = payload["params"]["meta"]
+
+    assert meta["source"] == "molecule"
+    assert meta["kind"] == "peer_agent"
+    assert meta["peer_id"] == "ws-peer-uuid"
+    assert meta["method"] == "message/send"
+    assert meta["activity_id"] == "act-7"
+    assert meta["ts"] == "2026-05-01T01:23:45Z"
+
+
+def test_build_channel_notification_no_id_field():
+    """Notifications MUST NOT carry a JSON-RPC `id` field — that's
+    what distinguishes them from requests. A notification with `id`
+    would be mis-interpreted as a request and clients would wait
+    for a response that never comes."""
+    from a2a_mcp_server import _build_channel_notification
+
+    payload = _build_channel_notification({"text": "x"})
+
+    assert "id" not in payload, (
+        "notifications must omit `id` per JSON-RPC 2.0 spec — "
+        "presence would make MCP clients await a phantom response"
+    )
+
+
+def test_build_channel_notification_handles_missing_fields_gracefully():
+    """Some fields may be absent on edge-case messages (e.g. cursor
+    bootstrapping with no created_at yet). Default to empty strings
+    so the wire shape stays valid JSON instead of crashing."""
+    from a2a_mcp_server import _build_channel_notification
+
+    payload = _build_channel_notification({})
+
+    assert payload["params"]["content"] == ""
+    meta = payload["params"]["meta"]
+    assert meta["activity_id"] == ""
+    assert meta["peer_id"] == ""
+    assert meta["kind"] == ""
--- a/workspace/tests/test_config.py
+++ b/workspace/tests/test_config.py
@ -81,6 +81,89 @@ def test_load_config_model_no_env(tmp_path, monkeypatch):
    assert cfg.model == "openai:gpt-4o"


+def test_runtime_config_model_falls_back_to_top_level(tmp_path, monkeypatch):
+    """When YAML omits runtime_config.model, fall back to the top-level
+    resolved model.
+
+    Without this fallback, SaaS workspaces silently boot with the
+    adapter's hard-coded default — claude-code-default reads
+    ``runtime_config.model or "sonnet"``, so even a user who picks Opus
+    in the canvas Config tab gets Sonnet on the next restart. Root
+    cause: the CP user-data script regenerates /configs/config.yaml
+    at every boot with only ``name``, ``runtime``, ``a2a`` keys
+    (intentionally minimal so it doesn't carry stale state), losing
+    runtime_config.model. MODEL_PROVIDER is plumbed as an env var, so
+    picking it up via the top-level resolved ``model`` keeps the
+    selection sticky across restarts.
+    """
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    # Top-level model set, runtime_config.model NOT set — exactly the
+    # shape the CP user-data writes after restart.
+    config_yaml.write_text(yaml.dump({"model": "anthropic:claude-opus-4-7"}))
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.runtime_config.model == "anthropic:claude-opus-4-7"
+
+
+def test_runtime_config_model_yaml_wins_over_top_level(tmp_path, monkeypatch):
+    """When YAML explicitly sets runtime_config.model, it takes precedence
+    over the top-level model. Tests the fallback is only a fallback —
+    not a clobber that would break workspaces with intentionally
+    different runtime_config.model vs top-level model values.
+    """
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(
+        yaml.dump(
+            {
+                "model": "anthropic:claude-opus-4-7",
+                "runtime_config": {"model": "openai:gpt-4o"},
+            }
+        )
+    )
+
+    cfg = load_config(str(tmp_path))
+    # Top-level still resolves to its own value.
+    assert cfg.model == "anthropic:claude-opus-4-7"
+    # runtime_config.model wins — fallback only fires when YAML is empty.
+    assert cfg.runtime_config.model == "openai:gpt-4o"
+
+
+def test_runtime_config_model_picks_up_env_via_top_level(tmp_path, monkeypatch):
+    """End-to-end path the canvas Save+Restart relies on: user picks
+    a model → workspace_secrets.MODEL_PROVIDER updated → CP user-data
+    re-renders /configs/config.yaml WITHOUT runtime_config.model →
+    workspace boots with MODEL_PROVIDER env var. The top-level model
+    resolves from MODEL_PROVIDER (line 277), then runtime_config.model
+    falls back to that. Adapter sees the user's selection.
+
+    This is the regression test for the canvas-side feedback
+    "Provisioner doesn't read model from config.yaml and doesn't set
+    MODEL env var. Without MODEL, the adapter defaults to sonnet and
+    bypasses the mimo routing." (2026-04-30).
+    """
+    monkeypatch.setenv("MODEL_PROVIDER", "minimax/abab7-chat-preview")
+    config_yaml = tmp_path / "config.yaml"
+    # CP-shaped minimal config.yaml: only name + runtime + a2a, NO
+    # top-level model, NO runtime_config.model.
+    config_yaml.write_text(
+        yaml.dump(
+            {
+                "name": "Test Agent",
+                "runtime": "claude-code",
+                "a2a": {"port": 8000, "streaming": True},
+            }
+        )
+    )
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.model == "minimax/abab7-chat-preview"
+    # The adapter (claude-code-default reads runtime_config.model or "sonnet")
+    # now sees the user's selected model instead of "sonnet".
+    assert cfg.runtime_config.model == "minimax/abab7-chat-preview"
+
+
 def test_delegation_config_defaults(tmp_path):
    """DelegationConfig nested defaults are applied."""
    config_yaml = tmp_path / "config.yaml"
--- a/workspace/tests/test_inbox.py
+++ b/workspace/tests/test_inbox.py
@ -442,3 +442,113 @@ def test_default_cursor_path_uses_configs_dir(monkeypatch, tmp_path: Path):
 def test_default_cursor_path_falls_back_to_default(monkeypatch):
    monkeypatch.delenv("CONFIGS_DIR", raising=False)
    assert inbox.default_cursor_path() == Path("/configs") / ".mcp_inbox_cursor"
+
+
+# ---------------------------------------------------------------------------
+# Notification callback bridge — push UX for notification-capable hosts
+# ---------------------------------------------------------------------------
+#
+# `record()` is called from the poller daemon thread when a new activity
+# row arrives. Notification-capable MCP hosts (Claude Code) want to be
+# pushed a notification — the universal wheel registers a callback via
+# `set_notification_callback()` that fires the MCP notification. Pollers
+# (`wait_for_message`/`inbox_peek`) keep working unchanged.
+
+
+@pytest.fixture(autouse=True)
+def _reset_notification_callback():
+    """Each test starts with no callback registered. Notification
+    state must not leak across tests — same pattern as _reset_singleton."""
+    inbox.set_notification_callback(None)
+    yield
+    inbox.set_notification_callback(None)
+
+
+def test_record_fires_notification_callback_with_message_dict(state: inbox.InboxState):
+    """When a callback is registered, record() invokes it with the
+    canonical to_dict() shape — same shape inbox_peek returns to the
+    agent. Callers can build MCP notification payloads from this
+    without re-deriving fields."""
+    received: list[dict] = []
+    inbox.set_notification_callback(received.append)
+
+    state.record(_msg("act-1", peer_id="ws-peer", text="hello"))
+
+    assert len(received) == 1
+    payload = received[0]
+    assert payload["activity_id"] == "act-1"
+    assert payload["text"] == "hello"
+    assert payload["peer_id"] == "ws-peer"
+    assert payload["kind"] == "peer_agent"  # to_dict derives this
+    assert payload["method"] == "message/send"
+
+
+def test_record_dedupe_does_not_refire_callback(state: inbox.InboxState):
+    """The activity_id dedupe path must short-circuit BEFORE invoking
+    the callback — otherwise a notification-capable host would see
+    duplicate push events on poller backlog overlap."""
+    received: list[dict] = []
+    inbox.set_notification_callback(received.append)
+
+    state.record(_msg("act-1"))
+    state.record(_msg("act-1"))  # dedupe — same id
+
+    assert len(received) == 1, (
+        f"expected 1 callback (dedupe), got {len(received)} — "
+        f"would cause duplicate Claude conversation interrupts"
+    )
+
+
+def test_record_callback_exception_does_not_break_inbox(state: inbox.InboxState):
+    """A raising callback (e.g. asyncio loop closed mid-shutdown,
+    serialization error on an exotic message) must NOT prevent the
+    message from landing in the queue. Notification delivery is
+    best-effort; inbox correctness is not negotiable."""
+
+    def boom(_payload):
+        raise RuntimeError("simulated callback failure")
+
+    inbox.set_notification_callback(boom)
+
+    # Must not raise, must still queue the message.
+    state.record(_msg("act-1"))
+
+    queued = state.peek(10)
+    assert len(queued) == 1
+    assert queued[0].activity_id == "act-1"
+
+
+def test_record_no_callback_registered_is_no_op(state: inbox.InboxState):
+    """When no callback is set (in-container path, or before
+    activation), record() proceeds normally — no None-call crash."""
+    # No set_notification_callback() in this test — autouse fixture
+    # cleared any previous registration.
+    state.record(_msg("act-1"))
+    assert len(state.peek(10)) == 1
+
+
+def test_set_notification_callback_replaces_previous(state: inbox.InboxState):
+    """Re-registering the callback replaces the previous — only the
+    latest callback fires. Test ensures the universal wheel can update
+    the bridge if its asyncio loop is replaced (e.g. graceful restart)."""
+    first: list[dict] = []
+    second: list[dict] = []
+    inbox.set_notification_callback(first.append)
+    inbox.set_notification_callback(second.append)
+
+    state.record(_msg("act-1"))
+
+    assert len(first) == 0, "first callback should be unregistered"
+    assert len(second) == 1, "second callback should receive the event"
+
+
+def test_set_notification_callback_none_clears(state: inbox.InboxState):
+    """Setting None clears the callback — used by tests + the wheel's
+    shutdown path."""
+    received: list[dict] = []
+    inbox.set_notification_callback(received.append)
+    inbox.set_notification_callback(None)
+
+    state.record(_msg("act-1"))
+
+    assert received == []