Three production bugs caught by the codex agent live-testing the daemon
end-to-end against codex-cli 0.128 + a real LaunchAgent install:
1. Codex CLI 0.6+ moved `--resume` from a flag on `exec` to a
`resume` subcommand. The daemon was sending
`codex exec --skip-git-repo-check --resume <sid> <prompt>`, which
parses on 0.5.x but fails on 0.6.x+. Fixed to:
fresh: codex exec --skip-git-repo-check <prompt>
resume: codex exec resume --skip-git-repo-check <sid> <prompt>
Verified on codex-cli 0.128 with a live binary; the 0.5.x behavior
is preserved by the fake-codex test fixture, which now SystemExits
if it sees the legacy `--resume` flag (regression gate).
2. wait_for_message() never returned anything because nothing in the
daemon ever called molecule_runtime.inbox.activate() or started
the poller thread. The wheel ships those primitives but expects
the embedding runtime to wire them up — the workspace runtime
does this in start.py, but a standalone daemon embedding the
tools must do it itself. Added the activation + poller-thread
start in _RealTools.__init__.
3. The codex binary is a `#!/usr/bin/env node` shim. Under launchd /
stripped systemd units, the parent process PATH is `/usr/bin:/bin`
and `env node` 127s out silently. CodexRunner now prepends the
directory of the resolved codex binary to the subprocess PATH at
spawn time — Node lives next to codex under nvm / brew /
pnpm-global, so this restores the discovery without operators
having to thread PATH through their LaunchAgent / unit file.
README updated with a note on the launchd/systemd interaction.
Test:
- 31 passed (was 28). Three new regression gates: subcommand-shape,
PATH-prepend (launchd-default PATH stripped), and PATH-no-double
(idempotent when codex_bin_dir already present). Verified the two
behavioural new tests FAIL on the old codex_runner.py by stashing
the source change only and re-running.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
250 lines
8.6 KiB
Python
250 lines
8.6 KiB
Python
"""Real-subprocess test for CodexRunner — boot path coverage.
|
|
|
|
In-process mocking would miss subprocess-level bugs (env handling,
|
|
arg passthrough, stderr capture, signal/timeout). The fake script is a
|
|
real Python program spawned via asyncio.create_subprocess_exec, exactly
|
|
as a real codex install would be.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from codex_channel_molecule.codex_runner import (
|
|
CodexRunner,
|
|
_extract_session_id,
|
|
)
|
|
|
|
|
|
_FAKE_CODEX_SCRIPT = textwrap.dedent("""\
|
|
#!/usr/bin/env python3
|
|
\"\"\"Fake codex CLI for tests.
|
|
|
|
Behaviors keyed on argv shape and env:
|
|
argv: codex exec [--skip-git-repo-check] <message>
|
|
argv: codex exec resume [--skip-git-repo-check] <sid> <message>
|
|
|
|
Echoes a banner to stderr (\"session: <uuid>\") and the input message
|
|
to stdout. Honors FAKE_EXIT_CODE for failure-path tests.
|
|
\"\"\"
|
|
import os, sys
|
|
|
|
args = sys.argv[1:]
|
|
assert args[0] == \"exec\", f\"unexpected first arg: {args[0]!r}\"
|
|
|
|
if \"--resume\" in args:
|
|
raise SystemExit(\"--resume is not supported\")
|
|
|
|
resume_id = None
|
|
if len(args) > 1 and args[1] == \"resume\":
|
|
i = 2
|
|
while i < len(args):
|
|
if args[i].startswith(\"--\"):
|
|
i += 1
|
|
else:
|
|
break
|
|
resume_id = args[i]
|
|
msg = args[i + 1] if i + 1 < len(args) else \"\"
|
|
else:
|
|
i = 1
|
|
while i < len(args):
|
|
if args[i].startswith(\"--\"):
|
|
i += 1 # skip unrecognized flag
|
|
else:
|
|
break
|
|
msg = args[i] if i < len(args) else \"\"
|
|
|
|
sid = resume_id or os.environ.get(\"FAKE_NEW_SESSION_ID\", \"a1b2c3d4-1111-2222-3333-444455556666\")
|
|
sys.stderr.write(f\"session: {sid}\\n\")
|
|
sys.stderr.flush()
|
|
|
|
sys.stdout.write(f\"echo: {msg}\\n\")
|
|
sys.stdout.flush()
|
|
|
|
sys.exit(int(os.environ.get(\"FAKE_EXIT_CODE\", \"0\")))
|
|
""")
|
|
|
|
|
|
@pytest.fixture
|
|
def fake_codex(tmp_path: Path) -> Path:
|
|
p = tmp_path / "codex"
|
|
p.write_text(_FAKE_CODEX_SCRIPT)
|
|
p.chmod(0o755)
|
|
return p
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_returns_stdout_text(fake_codex):
|
|
runner = CodexRunner(codex_bin=str(fake_codex), timeout_secs=10.0)
|
|
result = await runner.run(message="hello world")
|
|
assert result.text == "echo: hello world"
|
|
assert result.exit_code == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_extracts_new_session_id_from_stderr(fake_codex, monkeypatch):
|
|
runner = CodexRunner(codex_bin=str(fake_codex), timeout_secs=10.0)
|
|
monkeypatch.setenv("FAKE_NEW_SESSION_ID", "deadbeef-0000-1111-2222-333344445555")
|
|
result = await runner.run(message="any")
|
|
assert result.session_id == "deadbeef-0000-1111-2222-333344445555"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_resumes_existing_session(fake_codex):
|
|
runner = CodexRunner(codex_bin=str(fake_codex), timeout_secs=10.0)
|
|
given = "11111111-2222-3333-4444-555555555555"
|
|
result = await runner.run(message="follow up", session_id=given)
|
|
# Resume → no new session id is captured (input is echoed back as-is).
|
|
assert result.session_id == given
|
|
assert result.exit_code == 0
|
|
assert result.text == "echo: follow up"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_surfaces_nonzero_exit_code(fake_codex, monkeypatch):
|
|
runner = CodexRunner(codex_bin=str(fake_codex), timeout_secs=10.0)
|
|
monkeypatch.setenv("FAKE_EXIT_CODE", "7")
|
|
result = await runner.run(message="any")
|
|
assert result.exit_code == 7
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_kills_subprocess_on_timeout(tmp_path):
|
|
"""A codex turn that hangs past the timeout must be killed and
|
|
surfaced as a timeout result — not block the bridge forever."""
|
|
sleeper = tmp_path / "codex"
|
|
sleeper.write_text(textwrap.dedent("""\
|
|
#!/usr/bin/env python3
|
|
import time
|
|
time.sleep(60)
|
|
"""))
|
|
sleeper.chmod(0o755)
|
|
|
|
runner = CodexRunner(codex_bin=str(sleeper), timeout_secs=0.5)
|
|
result = await runner.run(message="hang please")
|
|
assert "timed out" in result.text
|
|
assert result.exit_code == -1
|
|
|
|
|
|
def test_constructor_fails_fast_when_codex_missing():
|
|
with pytest.raises(FileNotFoundError) as excinfo:
|
|
CodexRunner(codex_bin="/nonexistent/path/to/codex-binary-xyzzy")
|
|
assert "codex" in str(excinfo.value).lower()
|
|
|
|
|
|
def test_extract_session_id_matches_canonical_banner():
|
|
assert _extract_session_id("session: a1b2c3d4-aaaa-bbbb-cccc-dddddddddddd\n") == \
|
|
"a1b2c3d4-aaaa-bbbb-cccc-dddddddddddd"
|
|
|
|
|
|
def test_extract_session_id_matches_alternate_banner_shape():
|
|
assert _extract_session_id(
|
|
"blah blah\nsession_id=12345678-aaaa-bbbb-cccc-dddddddddddd\nmore\n"
|
|
) == "12345678-aaaa-bbbb-cccc-dddddddddddd"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_uses_exec_resume_subcommand_not_legacy_flag(fake_codex):
|
|
"""Codex CLI 0.6+ rejects ``codex exec --resume <sid>``; the correct
|
|
invocation is ``codex exec resume [--skip-git-repo-check] <sid> <prompt>``.
|
|
The fake-codex fixture raises SystemExit if it sees ``--resume`` in
|
|
argv, so the assertion is that the call SUCCEEDS — proving the
|
|
runner moved off the legacy flag.
|
|
"""
|
|
runner = CodexRunner(codex_bin=str(fake_codex), timeout_secs=10.0)
|
|
sid = "deadbeef-1111-2222-3333-444455556666"
|
|
result = await runner.run(message="follow", session_id=sid)
|
|
# Subprocess returned 0 because the fake didn't see --resume. If the
|
|
# runner ever regresses to the old shape, the fake exits non-zero
|
|
# with "--resume is not supported" → result.exit_code != 0.
|
|
assert result.exit_code == 0, (
|
|
f"Expected exec resume subcommand shape, got "
|
|
f"exit_code={result.exit_code} stderr_tail={result.stderr_tail!r}"
|
|
)
|
|
assert result.session_id == sid
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_prepends_codex_bin_dir_to_subprocess_path(tmp_path):
|
|
"""The codex binary is a `#!/usr/bin/env node` shim. Under launchd /
|
|
systemd, the parent process's PATH is stripped to /usr/bin:/bin, so
|
|
the shebang's `env node` lookup fails and codex 127s out silently.
|
|
The runner mitigates this by prepending the codex binary's own
|
|
directory (where node lives in nvm/brew/pnpm-global layouts) to the
|
|
subprocess PATH. Pin: subprocess sees that dir first in PATH.
|
|
"""
|
|
bin_dir = tmp_path / "fakebin"
|
|
bin_dir.mkdir()
|
|
fake = bin_dir / "codex"
|
|
# Echo the resolved subprocess PATH so the test can inspect it.
|
|
fake.write_text(textwrap.dedent("""\
|
|
#!/usr/bin/env python3
|
|
import os, sys
|
|
sys.stderr.write(f"session: a1b2c3d4-1111-2222-3333-444455556666\\n")
|
|
sys.stdout.write(os.environ.get("PATH", ""))
|
|
"""))
|
|
fake.chmod(0o755)
|
|
|
|
runner = CodexRunner(codex_bin=str(fake), timeout_secs=10.0)
|
|
# Strip PATH down to the launchd-default minimum to prove the
|
|
# runner's own injection puts bin_dir back.
|
|
import os as _os
|
|
saved = _os.environ.get("PATH", "")
|
|
_os.environ["PATH"] = "/usr/bin:/bin"
|
|
try:
|
|
result = await runner.run(message="probe")
|
|
finally:
|
|
_os.environ["PATH"] = saved
|
|
|
|
assert result.exit_code == 0
|
|
seen_path = result.text
|
|
# bin_dir must appear FIRST so `env node` finds it before falling
|
|
# back to the (empty for launchd) system PATH.
|
|
assert seen_path.startswith(str(bin_dir) + os.pathsep), (
|
|
f"expected codex_bin_dir prefix in subprocess PATH, got: {seen_path!r}"
|
|
)
|
|
assert "/usr/bin" in seen_path, (
|
|
f"expected existing PATH preserved after prefix, got: {seen_path!r}"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_does_not_duplicate_codex_bin_dir_in_path(tmp_path):
|
|
"""If the operator's PATH already includes the codex bin dir (the
|
|
shell-launched case), don't double it — keeps logs and downstream
|
|
tools sane.
|
|
"""
|
|
bin_dir = tmp_path / "alreadythere"
|
|
bin_dir.mkdir()
|
|
fake = bin_dir / "codex"
|
|
fake.write_text(textwrap.dedent("""\
|
|
#!/usr/bin/env python3
|
|
import os, sys
|
|
sys.stderr.write(f"session: a1b2c3d4-1111-2222-3333-444455556666\\n")
|
|
sys.stdout.write(os.environ.get("PATH", ""))
|
|
"""))
|
|
fake.chmod(0o755)
|
|
|
|
runner = CodexRunner(codex_bin=str(fake), timeout_secs=10.0)
|
|
import os as _os
|
|
saved = _os.environ.get("PATH", "")
|
|
_os.environ["PATH"] = f"{bin_dir}:/usr/bin:/bin"
|
|
try:
|
|
result = await runner.run(message="probe")
|
|
finally:
|
|
_os.environ["PATH"] = saved
|
|
|
|
seen_path = result.text
|
|
occurrences = seen_path.split(os.pathsep).count(str(bin_dir))
|
|
assert occurrences == 1, (
|
|
f"expected codex_bin_dir to appear exactly once, got {occurrences}: {seen_path!r}"
|
|
)
|
|
|
|
|
|
def test_extract_session_id_returns_none_on_no_match():
|
|
assert _extract_session_id("nothing relevant here") is None
|