r"""Tests for entrypoint.sh's log_boot_context() shell function. The Python-side audit (test_adapter_logging.py) pins what `_audit_auth_env_presence` in adapter.py emits. But the shell function fires FIRST — twice, even (once pre-gosu as root, once post-gosu as agent). When the adapter never runs at all because the SDK import fails, the entrypoint emission is the operator's ONLY visibility into the boot env. So this contract needs its own test. The cross-file gate `test_audit_env_list_matches_entrypoint_sh` proves the NAME LIST matches; this file proves the SHELL CODE actually emits the right lines for those names. Without this, a typo in the for-loop body (e.g. `eval "val=\$$var"` → `val=$var`, which would print the literal name not its value) silently breaks the audit. Strategy: extract the `log_boot_context()` function body from entrypoint.sh and run it in a fresh subprocess with controlled env. Asserts on stdout. We never source entrypoint.sh wholesale because it would chown /workspace and exec molecule-runtime — neither is appropriate in a test sandbox. """ from __future__ import annotations import os import re import subprocess from pathlib import Path import pytest TEMPLATE_DIR = Path(__file__).resolve().parent.parent ENTRYPOINT = TEMPLATE_DIR / "entrypoint.sh" def _extract_function() -> str: """Pull just the log_boot_context() function definition out of entrypoint.sh. Returns the literal function definition (`log_boot_context() { ... }`) as a string, suitable for `sh -c "; log_boot_context"`. Bails with a clear message if the function can't be located — that itself is a regression worth a loud test failure. """ text = ENTRYPOINT.read_text() # `log_boot_context() {` on its own line, then everything up to the # matching closing `}` at column 0. The function is small and shape-stable; # we don't try to be a full shell parser. match = re.search(r"^log_boot_context\(\)\s*\{.*?^\}\s*$", text, re.DOTALL | re.MULTILINE) if not match: pytest.fail("Could not locate log_boot_context() in entrypoint.sh") return match.group(0) def _run_function(env: dict[str, str]) -> str: """Run log_boot_context() in a fresh /bin/sh with the given env. Returns stdout.""" func = _extract_function() script = f"{func}\nlog_boot_context\n" # Empty base env so PATH lookups (`id`, `hostname`, `date`, `ls`) still work # but no inherited auth vars leak into the test. We restore PATH explicitly. safe_env = {"PATH": os.environ.get("PATH", "/usr/bin:/bin")} safe_env.update(env) result = subprocess.run( ["/bin/sh", "-c", script], env=safe_env, capture_output=True, text=True, timeout=10, check=False, ) assert result.returncode == 0, ( f"log_boot_context exited rc={result.returncode}\n" f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" ) return result.stdout # Audit names — kept in lockstep with adapter.py's _AUTH_ENV_AUDIT and the # entrypoint.sh for-loop. test_audit_env_list_matches_entrypoint_sh and # test_loop_var_list_matches_audit (below) gate any drift across the three # locations. _AUDIT_NAMES = ( "CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_BASE_URL", "MINIMAX_API_KEY", "GLM_API_KEY", "KIMI_API_KEY", "DEEPSEEK_API_KEY", ) def test_emits_set_for_present_env(): """A set var must produce `env NAME=set` — proves the eval-deref works.""" out = _run_function({"MINIMAX_API_KEY": "secret-MUST-NOT-LEAK"}) assert "env MINIMAX_API_KEY=set" in out def test_emits_unset_for_absent_env(): """An unset var must produce `env NAME=unset` — proves the empty-string branch.""" out = _run_function({}) for name in _AUDIT_NAMES: assert f"env {name}=unset" in out, ( f"missing `env {name}=unset` line — for-loop body may be miscoded" ) def test_never_leaks_value(): """The audit prints NAMES, not VALUES. Regression here = secret leak. Same threat model as the Python-side test: an operator-visible boot log that contains the actual key would defeat the whole point of the audit (the audit exists so we can answer 'is the key present' WITHOUT exposing the key). A `eval "val=\\$$var"` typo collapsing to `echo $var` would trip this test. """ secret = "sk-FAKE-MUST-NEVER-APPEAR-IN-BOOT-LOG" out = _run_function({ "MINIMAX_API_KEY": secret, "CLAUDE_CODE_OAUTH_TOKEN": secret, "ANTHROPIC_BASE_URL": "https://api.example.com", }) assert secret not in out, f"boot-context log leaked the env VALUE:\n{out}" # ANTHROPIC_BASE_URL is the most-likely-to-be-logged-by-mistake field # because operators sometimes WANT to see it; pin that it's still # name-only. assert "https://api.example.com" not in out def test_emits_workspace_id_and_platform_url(): """WORKSPACE_ID and PLATFORM_URL appear by VALUE — these are not secrets. They're the operator-visible identifiers a support engineer needs to correlate logs with platform records. Pinning the field shape so a later refactor doesn't accidentally redact them. """ out = _run_function({ "WORKSPACE_ID": "ws-test-1234", "PLATFORM_URL": "https://test.example.com", }) assert "workspace_id=ws-test-1234" in out assert "platform_url=https://test.example.com" in out def test_emits_unset_marker_when_workspace_id_missing(): """Missing WORKSPACE_ID falls back to the literal `` placeholder. A support engineer reading the boot log must be able to distinguish 'WORKSPACE_ID was empty string' from 'WORKSPACE_ID was never injected by the platform'. The shell `${VAR:-}` default handles that. """ out = _run_function({}) assert "workspace_id=" in out assert "platform_url=" in out def test_emits_uid_and_gid(): """uid/gid line is critical — answers 'did the privilege drop happen?' The two-emission pattern (pre-gosu as root, post-gosu as agent) only works as a diagnostic if uid/gid is in every emission. Pin the field shape; we don't pin the literal value because CI runs vary. """ out = _run_function({}) assert re.search(r"uid=\d+\s+gid=\d+", out), ( f"missing or malformed uid/gid line:\n{out}" ) def test_emits_boot_marker(): """Each emission starts with the dated `entrypoint boot` banner. Operators grep for this to count restarts in a crash loop. """ out = _run_function({}) # Format: "----- entrypoint boot 2026-05-02T12:34:56Z -----" assert re.search( r"-----\s+entrypoint boot \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\s+-----", out, ), f"missing boot banner:\n{out}" def test_loop_var_list_matches_audit(): """The for-loop's literal NAME list must match _AUDIT_NAMES (this file). Companion to test_audit_env_list_matches_entrypoint_sh in test_adapter_logging.py: that test cross-checks adapter.py vs entrypoint.sh; this one cross-checks entrypoint.sh vs the test fixture above. If a maintainer adds a vendor to entrypoint.sh without updating the audit name tuple in this file, the existing `test_emits_unset_for_absent_env` would still pass (because all audited names also appear in the loop), but the maintainer would have a false sense of coverage. This test catches that. """ text = ENTRYPOINT.read_text() loop_line = next( (line for line in text.splitlines() if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line), None, ) assert loop_line, "entrypoint.sh missing the auth-env for-loop" names_in_shell = tuple( loop_line.split("for var in", 1)[1].split(";", 1)[0].split() ) assert set(names_in_shell) == set(_AUDIT_NAMES), ( f"_AUDIT_NAMES in this file ({set(_AUDIT_NAMES)}) and the for-loop " f"in entrypoint.sh ({set(names_in_shell)}) disagree — update the " "test fixture or the shell loop to bring them back in sync." )