codex-channel-molecule is the codex-side counterpart to hermes-channel-molecule. It long-polls the molecule platform inbox via molecule_runtime.a2a_tools.tool_wait_for_message, runs `codex exec --resume <session>` per inbound message, captures the assistant reply from stdout, and routes it back through send_message_to_user (canvas chat) or delegate_task (peer agent), then acks the inbox row. Per chat thread (one canvas-user thread or one peer-workspace thread) gets its own codex session_id, persisted to disk so daemon restarts keep conversation context. Reply-routing failures skip the inbox_pop ack so the platform's at-least-once delivery re-surfaces the row on the next poll. This daemon is the operator-unblock until openai/codex#17543 lands — once codex itself accepts MCP custom notifications as Op::UserInput through the wired-in MCP server, this daemon becomes redundant. The README's deprecation-path section calls that out so future operators know when to switch off. Tests cover the dispatch loop with fake tools (8 tests asserting exact contracts: canvas vs peer routing, session continuity, persistence across restarts, timeout sentinel handling, at-least-once on reply failure, exit-code surfacing, A2A multipart text). The codex_runner tests are real-subprocess (fake codex script spawned via asyncio.create_subprocess_exec) so the boot path matches production — no in-process mocking of the spawn boundary. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
300 lines
11 KiB
Python
300 lines
11 KiB
Python
"""Bridge-loop tests with fake tools + fake codex runner.
|
|
|
|
The fakes capture every call so each test asserts exact contracts:
|
|
which kind of message reaches which reply tool, what activity_ids are
|
|
acked, how session_id continuity is maintained across turns.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import pytest
|
|
|
|
from codex_channel_molecule.bridge import _SessionStore, run_bridge
|
|
from codex_channel_molecule.codex_runner import CodexResult
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Fakes
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
class FakeTools:
|
|
"""Records every tool call. wait_for_message replays from a queue
|
|
seeded by the test."""
|
|
|
|
def __init__(self, inbox: List[Dict[str, Any]]) -> None:
|
|
self._inbox: asyncio.Queue[Dict[str, Any]] = asyncio.Queue()
|
|
for m in inbox:
|
|
self._inbox.put_nowait(m)
|
|
self.popped: List[str] = []
|
|
self.canvas_replies: List[Tuple[str, Optional[str]]] = [] # (text, ws)
|
|
self.peer_replies: List[Tuple[str, str, Optional[str]]] = []
|
|
# (workspace_id, task, source_workspace_id)
|
|
|
|
async def wait_for_message(self, timeout_secs: float) -> str:
|
|
# Drain the queue immediately; a real implementation would block
|
|
# for timeout_secs. After exhaustion, return the timeout sentinel
|
|
# so the bridge keeps cycling without hanging.
|
|
try:
|
|
msg = self._inbox.get_nowait()
|
|
return json.dumps(msg)
|
|
except asyncio.QueueEmpty:
|
|
return json.dumps({"timeout": True, "timeout_secs": timeout_secs})
|
|
|
|
async def inbox_pop(self, activity_id: str) -> str:
|
|
self.popped.append(activity_id)
|
|
return json.dumps({"removed": True, "activity_id": activity_id})
|
|
|
|
async def send_message_to_user(
|
|
self, message: str, workspace_id: Optional[str]
|
|
) -> str:
|
|
self.canvas_replies.append((message, workspace_id))
|
|
return "ok"
|
|
|
|
async def delegate_task(
|
|
self, workspace_id: str, task: str, source_workspace_id: Optional[str]
|
|
) -> str:
|
|
self.peer_replies.append((workspace_id, task, source_workspace_id))
|
|
return "ok"
|
|
|
|
|
|
class FakeRunner:
|
|
"""Returns scripted CodexResults; records every call. Lets tests
|
|
pin session-continuity behavior without spawning real codex."""
|
|
|
|
def __init__(self, results: List[CodexResult]) -> None:
|
|
self._results = list(results)
|
|
self.calls: List[Tuple[str, Optional[str]]] = [] # (message, session_id)
|
|
|
|
async def run(
|
|
self, message: str, session_id: Optional[str] = None
|
|
) -> CodexResult:
|
|
self.calls.append((message, session_id))
|
|
if not self._results:
|
|
return CodexResult(
|
|
text="(no scripted result)",
|
|
session_id=session_id,
|
|
exit_code=0,
|
|
stderr_tail="",
|
|
)
|
|
return self._results.pop(0)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Tests
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_canvas_user_message_is_dispatched_acked_and_replied(tmp_path):
|
|
"""Canvas-user inbound → CodexRunner.run → send_message_to_user →
|
|
inbox_pop. Assert the full chain in one go."""
|
|
inbox = [{
|
|
"kind": "canvas_user",
|
|
"activity_id": "act-1",
|
|
"arrival_workspace_id": "ws-canvas",
|
|
"text": "hi can you help",
|
|
}]
|
|
tools = FakeTools(inbox)
|
|
runner = FakeRunner([CodexResult(
|
|
text="sure, what's up",
|
|
session_id="sess-canvas-1",
|
|
exit_code=0,
|
|
stderr_tail="",
|
|
)])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=2)
|
|
|
|
assert runner.calls == [("hi can you help", None)]
|
|
assert tools.canvas_replies == [("sure, what's up", "ws-canvas")]
|
|
assert tools.peer_replies == []
|
|
assert tools.popped == ["act-1"]
|
|
assert store.get("canvas:ws-canvas") == "sess-canvas-1"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_peer_agent_message_routes_to_delegate_task(tmp_path):
|
|
inbox = [{
|
|
"kind": "peer_agent",
|
|
"activity_id": "act-2",
|
|
"peer_id": "ws-peer",
|
|
"arrival_workspace_id": "ws-self",
|
|
"text": "what's your status",
|
|
}]
|
|
tools = FakeTools(inbox)
|
|
runner = FakeRunner([CodexResult(
|
|
text="all good", session_id="sess-peer-1", exit_code=0, stderr_tail="",
|
|
)])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=2)
|
|
|
|
assert tools.peer_replies == [("ws-peer", "all good", "ws-self")]
|
|
assert tools.canvas_replies == []
|
|
assert tools.popped == ["act-2"]
|
|
assert store.get("peer:ws-peer") == "sess-peer-1"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_session_continuity_resumes_same_codex_session(tmp_path):
|
|
"""Two messages on the same chat_id → second call resumes the
|
|
session_id captured from the first."""
|
|
inbox = [
|
|
{"kind": "canvas_user", "activity_id": "act-a",
|
|
"arrival_workspace_id": "ws-x", "text": "first"},
|
|
{"kind": "canvas_user", "activity_id": "act-b",
|
|
"arrival_workspace_id": "ws-x", "text": "second"},
|
|
]
|
|
tools = FakeTools(inbox)
|
|
runner = FakeRunner([
|
|
CodexResult(text="r1", session_id="sess-shared", exit_code=0, stderr_tail=""),
|
|
CodexResult(text="r2", session_id="sess-shared", exit_code=0, stderr_tail=""),
|
|
])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=3)
|
|
|
|
# First call: no session_id (new). Second call: resume sess-shared.
|
|
assert runner.calls == [("first", None), ("second", "sess-shared")]
|
|
assert tools.popped == ["act-a", "act-b"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_session_store_persists_across_runs(tmp_path):
|
|
"""Session map survives daemon restart — written to disk on each
|
|
update, re-read on the next instantiation."""
|
|
state_file = tmp_path / "sessions.json"
|
|
inbox_one = [{
|
|
"kind": "canvas_user", "activity_id": "act-1",
|
|
"arrival_workspace_id": "ws-resume", "text": "first ever",
|
|
}]
|
|
tools_one = FakeTools(inbox_one)
|
|
runner_one = FakeRunner([CodexResult(
|
|
text="hi", session_id="sess-persist", exit_code=0, stderr_tail="",
|
|
)])
|
|
store_one = _SessionStore(state_file)
|
|
await run_bridge(
|
|
runner=runner_one, tools=tools_one, session_store=store_one, iterations=2,
|
|
)
|
|
assert state_file.exists()
|
|
|
|
# Simulate daemon restart — fresh store reads the same file.
|
|
inbox_two = [{
|
|
"kind": "canvas_user", "activity_id": "act-2",
|
|
"arrival_workspace_id": "ws-resume", "text": "follow up",
|
|
}]
|
|
tools_two = FakeTools(inbox_two)
|
|
runner_two = FakeRunner([CodexResult(
|
|
text="ok", session_id="sess-persist", exit_code=0, stderr_tail="",
|
|
)])
|
|
store_two = _SessionStore(state_file)
|
|
await run_bridge(
|
|
runner=runner_two, tools=tools_two, session_store=store_two, iterations=2,
|
|
)
|
|
|
|
# Second instance must have resumed the persisted session id.
|
|
assert runner_two.calls == [("follow up", "sess-persist")]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_timeout_sentinel_does_not_call_codex(tmp_path):
|
|
"""An empty inbox returns a timeout sentinel — the bridge must
|
|
keep polling without spawning codex."""
|
|
tools = FakeTools(inbox=[]) # queue empty → timeout sentinel
|
|
runner = FakeRunner(results=[])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=3)
|
|
|
|
assert runner.calls == []
|
|
assert tools.popped == []
|
|
assert tools.canvas_replies == []
|
|
assert tools.peer_replies == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_failed_reply_routing_skips_inbox_pop(tmp_path):
|
|
"""If sending the reply fails, do NOT ack the inbox row — the
|
|
platform will re-deliver on the next poll. At-least-once semantics.
|
|
"""
|
|
inbox = [{
|
|
"kind": "canvas_user", "activity_id": "act-err",
|
|
"arrival_workspace_id": "ws-x", "text": "msg",
|
|
}]
|
|
|
|
class FlakyTools(FakeTools):
|
|
async def send_message_to_user(self, message, workspace_id):
|
|
raise RuntimeError("simulated 502 from platform")
|
|
|
|
tools = FlakyTools(inbox)
|
|
runner = FakeRunner([CodexResult(
|
|
text="reply", session_id="sess", exit_code=0, stderr_tail="",
|
|
)])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=2)
|
|
|
|
assert runner.calls == [("msg", None)]
|
|
# Must NOT have popped — at-least-once requires the unacked row to
|
|
# re-surface next poll.
|
|
assert tools.popped == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_nonzero_exit_code_surfaces_in_reply(tmp_path):
|
|
"""Codex failure (e.g. timeout, crash) becomes a visible reply
|
|
instead of silently dropping. Operator sees the failure where the
|
|
answer was expected."""
|
|
inbox = [{
|
|
"kind": "canvas_user", "activity_id": "act-fail",
|
|
"arrival_workspace_id": "ws-x", "text": "ping",
|
|
}]
|
|
tools = FakeTools(inbox)
|
|
runner = FakeRunner([CodexResult(
|
|
text="(codex exec timed out after 600s)",
|
|
session_id=None,
|
|
exit_code=-1,
|
|
stderr_tail="timeout",
|
|
)])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=2)
|
|
|
|
assert len(tools.canvas_replies) == 1
|
|
text, _ = tools.canvas_replies[0]
|
|
assert "timed out" in text
|
|
assert "exit=-1" in text
|
|
# The row is acked — codex's verdict (success or failure) is
|
|
# delivered, so the inbox row is fully handled.
|
|
assert tools.popped == ["act-fail"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_a2a_multipart_text_is_concatenated(tmp_path):
|
|
"""A2A messages can arrive as ``parts: [{type: text, text: ...}, ...]``
|
|
instead of a flat ``text`` field. Bridge concatenates parts into a
|
|
single codex prompt."""
|
|
inbox = [{
|
|
"kind": "peer_agent", "activity_id": "act-p", "peer_id": "ws-peer",
|
|
"parts": [
|
|
{"type": "text", "text": "first chunk"},
|
|
{"type": "text", "text": "second chunk"},
|
|
],
|
|
}]
|
|
tools = FakeTools(inbox)
|
|
runner = FakeRunner([CodexResult(
|
|
text="ack", session_id="s", exit_code=0, stderr_tail="",
|
|
)])
|
|
store = _SessionStore(tmp_path / "sessions.json")
|
|
|
|
await run_bridge(runner=runner, tools=tools, session_store=store, iterations=2)
|
|
|
|
msg, _ = runner.calls[0]
|
|
assert msg == "first chunk\nsecond chunk"
|