From 227787bbbdd54ba86185a67ed0c9d37edc4bb6c9 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 2 May 2026 22:26:35 -0700 Subject: [PATCH] test: bash coverage for entrypoint.sh log_boot_context() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Python adapter audit (test_adapter_logging.py) pins the adapter.py side, but the entrypoint shell function fires earlier and twice (pre-gosu + post-gosu). When the SDK import wedge keeps the adapter from running at all, the shell emission is the operator's only visibility into the boot env. Eight new tests cover: - env NAME=set / env NAME=unset shape for every audited var - value-leak guard: secret strings never appear in output - WORKSPACE_ID + PLATFORM_URL passthrough by value (not secret) - fallback for missing platform identifiers - uid/gid line shape (used to verify the privilege drop) - dated boot banner shape (used to count restarts in a crash loop) - cross-file gate: shell for-loop names == fixture tuple, mirroring test_audit_env_list_matches_entrypoint_sh's adapter.py↔shell gate Strategy: regex-extract the function body from entrypoint.sh and run it in a fresh /bin/sh with controlled env. We never source the whole entrypoint because it would chown /workspace and exec molecule-runtime. Closes the gap from task #251 (follow-up to PR #32 boot-debug logging). Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_entrypoint_boot_context.py | 206 ++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 tests/test_entrypoint_boot_context.py diff --git a/tests/test_entrypoint_boot_context.py b/tests/test_entrypoint_boot_context.py new file mode 100644 index 0000000..1379181 --- /dev/null +++ b/tests/test_entrypoint_boot_context.py @@ -0,0 +1,206 @@ +r"""Tests for entrypoint.sh's log_boot_context() shell function. + +The Python-side audit (test_adapter_logging.py) pins what `_audit_auth_env_presence` +in adapter.py emits. But the shell function fires FIRST — twice, even (once +pre-gosu as root, once post-gosu as agent). When the adapter never runs at +all because the SDK import fails, the entrypoint emission is the operator's +ONLY visibility into the boot env. So this contract needs its own test. + +The cross-file gate `test_audit_env_list_matches_entrypoint_sh` proves the +NAME LIST matches; this file proves the SHELL CODE actually emits the +right lines for those names. Without this, a typo in the for-loop body +(e.g. `eval "val=\$$var"` → `val=$var`, which would print the literal +name not its value) silently breaks the audit. + +Strategy: extract the `log_boot_context()` function body from entrypoint.sh +and run it in a fresh subprocess with controlled env. Asserts on stdout. +We never source entrypoint.sh wholesale because it would chown /workspace +and exec molecule-runtime — neither is appropriate in a test sandbox. +""" +from __future__ import annotations + +import os +import re +import subprocess +from pathlib import Path + +import pytest + +TEMPLATE_DIR = Path(__file__).resolve().parent.parent +ENTRYPOINT = TEMPLATE_DIR / "entrypoint.sh" + + +def _extract_function() -> str: + """Pull just the log_boot_context() function definition out of entrypoint.sh. + + Returns the literal function definition (`log_boot_context() { ... }`) as + a string, suitable for `sh -c "; log_boot_context"`. Bails with a + clear message if the function can't be located — that itself is a + regression worth a loud test failure. + """ + text = ENTRYPOINT.read_text() + # `log_boot_context() {` on its own line, then everything up to the + # matching closing `}` at column 0. The function is small and shape-stable; + # we don't try to be a full shell parser. + match = re.search(r"^log_boot_context\(\)\s*\{.*?^\}\s*$", text, re.DOTALL | re.MULTILINE) + if not match: + pytest.fail("Could not locate log_boot_context() in entrypoint.sh") + return match.group(0) + + +def _run_function(env: dict[str, str]) -> str: + """Run log_boot_context() in a fresh /bin/sh with the given env. Returns stdout.""" + func = _extract_function() + script = f"{func}\nlog_boot_context\n" + # Empty base env so PATH lookups (`id`, `hostname`, `date`, `ls`) still work + # but no inherited auth vars leak into the test. We restore PATH explicitly. + safe_env = {"PATH": os.environ.get("PATH", "/usr/bin:/bin")} + safe_env.update(env) + result = subprocess.run( + ["/bin/sh", "-c", script], + env=safe_env, + capture_output=True, + text=True, + timeout=10, + check=False, + ) + assert result.returncode == 0, ( + f"log_boot_context exited rc={result.returncode}\n" + f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" + ) + return result.stdout + + +# Audit names — kept in lockstep with adapter.py's _AUTH_ENV_AUDIT and the +# entrypoint.sh for-loop. test_audit_env_list_matches_entrypoint_sh and +# test_loop_var_list_matches_audit (below) gate any drift across the three +# locations. +_AUDIT_NAMES = ( + "CLAUDE_CODE_OAUTH_TOKEN", + "ANTHROPIC_API_KEY", + "ANTHROPIC_AUTH_TOKEN", + "ANTHROPIC_BASE_URL", + "MINIMAX_API_KEY", + "GLM_API_KEY", + "KIMI_API_KEY", + "DEEPSEEK_API_KEY", +) + + +def test_emits_set_for_present_env(): + """A set var must produce `env NAME=set` — proves the eval-deref works.""" + out = _run_function({"MINIMAX_API_KEY": "secret-MUST-NOT-LEAK"}) + assert "env MINIMAX_API_KEY=set" in out + + +def test_emits_unset_for_absent_env(): + """An unset var must produce `env NAME=unset` — proves the empty-string branch.""" + out = _run_function({}) + for name in _AUDIT_NAMES: + assert f"env {name}=unset" in out, ( + f"missing `env {name}=unset` line — for-loop body may be miscoded" + ) + + +def test_never_leaks_value(): + """The audit prints NAMES, not VALUES. Regression here = secret leak. + + Same threat model as the Python-side test: an operator-visible boot log + that contains the actual key would defeat the whole point of the audit + (the audit exists so we can answer 'is the key present' WITHOUT exposing + the key). A `eval "val=\\$$var"` typo collapsing to `echo $var` would + trip this test. + """ + secret = "sk-FAKE-MUST-NEVER-APPEAR-IN-BOOT-LOG" + out = _run_function({ + "MINIMAX_API_KEY": secret, + "CLAUDE_CODE_OAUTH_TOKEN": secret, + "ANTHROPIC_BASE_URL": "https://api.example.com", + }) + assert secret not in out, f"boot-context log leaked the env VALUE:\n{out}" + # ANTHROPIC_BASE_URL is the most-likely-to-be-logged-by-mistake field + # because operators sometimes WANT to see it; pin that it's still + # name-only. + assert "https://api.example.com" not in out + + +def test_emits_workspace_id_and_platform_url(): + """WORKSPACE_ID and PLATFORM_URL appear by VALUE — these are not secrets. + + They're the operator-visible identifiers a support engineer needs to + correlate logs with platform records. Pinning the field shape so a + later refactor doesn't accidentally redact them. + """ + out = _run_function({ + "WORKSPACE_ID": "ws-test-1234", + "PLATFORM_URL": "https://test.example.com", + }) + assert "workspace_id=ws-test-1234" in out + assert "platform_url=https://test.example.com" in out + + +def test_emits_unset_marker_when_workspace_id_missing(): + """Missing WORKSPACE_ID falls back to the literal `` placeholder. + + A support engineer reading the boot log must be able to distinguish + 'WORKSPACE_ID was empty string' from 'WORKSPACE_ID was never injected + by the platform'. The shell `${VAR:-}` default handles that. + """ + out = _run_function({}) + assert "workspace_id=" in out + assert "platform_url=" in out + + +def test_emits_uid_and_gid(): + """uid/gid line is critical — answers 'did the privilege drop happen?' + + The two-emission pattern (pre-gosu as root, post-gosu as agent) only + works as a diagnostic if uid/gid is in every emission. Pin the field + shape; we don't pin the literal value because CI runs vary. + """ + out = _run_function({}) + assert re.search(r"uid=\d+\s+gid=\d+", out), ( + f"missing or malformed uid/gid line:\n{out}" + ) + + +def test_emits_boot_marker(): + """Each emission starts with the dated `entrypoint boot` banner. + + Operators grep for this to count restarts in a crash loop. + """ + out = _run_function({}) + # Format: "----- entrypoint boot 2026-05-02T12:34:56Z -----" + assert re.search( + r"-----\s+entrypoint boot \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\s+-----", + out, + ), f"missing boot banner:\n{out}" + + +def test_loop_var_list_matches_audit(): + """The for-loop's literal NAME list must match _AUDIT_NAMES (this file). + + Companion to test_audit_env_list_matches_entrypoint_sh in + test_adapter_logging.py: that test cross-checks adapter.py vs + entrypoint.sh; this one cross-checks entrypoint.sh vs the test + fixture above. If a maintainer adds a vendor to entrypoint.sh + without updating the audit name tuple in this file, the existing + `test_emits_unset_for_absent_env` would still pass (because all + audited names also appear in the loop), but the maintainer would + have a false sense of coverage. This test catches that. + """ + text = ENTRYPOINT.read_text() + loop_line = next( + (line for line in text.splitlines() + if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line), + None, + ) + assert loop_line, "entrypoint.sh missing the auth-env for-loop" + names_in_shell = tuple( + loop_line.split("for var in", 1)[1].split(";", 1)[0].split() + ) + assert set(names_in_shell) == set(_AUDIT_NAMES), ( + f"_AUDIT_NAMES in this file ({set(_AUDIT_NAMES)}) and the for-loop " + f"in entrypoint.sh ({set(names_in_shell)}) disagree — update the " + "test fixture or the shell loop to bring them back in sync." + )