test: bash coverage for entrypoint.sh log_boot_context()

The Python adapter audit (test_adapter_logging.py) pins the adapter.py side, but the entrypoint shell function fires earlier and twice (pre-gosu + post-gosu). When the SDK import wedge keeps the adapter from running at all, the shell emission is the operator's only visibility into the boot env. Eight new tests cover: - env NAME=set / env NAME=unset shape for every audited var - value-leak guard: secret strings never appear in output - WORKSPACE_ID + PLATFORM_URL passthrough by value (not secret) - <unset> fallback for missing platform identifiers - uid/gid line shape (used to verify the privilege drop) - dated boot banner shape (used to count restarts in a crash loop) - cross-file gate: shell for-loop names == fixture tuple, mirroring test_audit_env_list_matches_entrypoint_sh's adapter.py↔shell gate Strategy: regex-extract the function body from entrypoint.sh and run it in a fresh /bin/sh with controlled env. We never source the whole entrypoint because it would chown /workspace and exec molecule-runtime. Closes the gap from task #251 (follow-up to PR #32 boot-debug logging). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 22:26:35 -07:00 · 2026-05-02 22:26:35 -07:00 · 227787bbbd
commit 227787bbbd
parent 50e16c5c73
1 changed files with 206 additions and 0 deletions
--- a/tests/test_entrypoint_boot_context.py
+++ b/tests/test_entrypoint_boot_context.py
@ -0,0 +1,206 @@
+r"""Tests for entrypoint.sh's log_boot_context() shell function.
+
+The Python-side audit (test_adapter_logging.py) pins what `_audit_auth_env_presence`
+in adapter.py emits. But the shell function fires FIRST — twice, even (once
+pre-gosu as root, once post-gosu as agent). When the adapter never runs at
+all because the SDK import fails, the entrypoint emission is the operator's
+ONLY visibility into the boot env. So this contract needs its own test.
+
+The cross-file gate `test_audit_env_list_matches_entrypoint_sh` proves the
+NAME LIST matches; this file proves the SHELL CODE actually emits the
+right lines for those names. Without this, a typo in the for-loop body
+(e.g. `eval "val=\$$var"` → `val=$var`, which would print the literal
+name not its value) silently breaks the audit.
+
+Strategy: extract the `log_boot_context()` function body from entrypoint.sh
+and run it in a fresh subprocess with controlled env. Asserts on stdout.
+We never source entrypoint.sh wholesale because it would chown /workspace
+and exec molecule-runtime — neither is appropriate in a test sandbox.
+"""
+from __future__ import annotations
+
+import os
+import re
+import subprocess
+from pathlib import Path
+
+import pytest
+
+TEMPLATE_DIR = Path(__file__).resolve().parent.parent
+ENTRYPOINT = TEMPLATE_DIR / "entrypoint.sh"
+
+
+def _extract_function() -> str:
+    """Pull just the log_boot_context() function definition out of entrypoint.sh.
+
+    Returns the literal function definition (`log_boot_context() { ... }`) as
+    a string, suitable for `sh -c "<func>; log_boot_context"`. Bails with a
+    clear message if the function can't be located — that itself is a
+    regression worth a loud test failure.
+    """
+    text = ENTRYPOINT.read_text()
+    # `log_boot_context() {` on its own line, then everything up to the
+    # matching closing `}` at column 0. The function is small and shape-stable;
+    # we don't try to be a full shell parser.
+    match = re.search(r"^log_boot_context\(\)\s*\{.*?^\}\s*$", text, re.DOTALL | re.MULTILINE)
+    if not match:
+        pytest.fail("Could not locate log_boot_context() in entrypoint.sh")
+    return match.group(0)
+
+
+def _run_function(env: dict[str, str]) -> str:
+    """Run log_boot_context() in a fresh /bin/sh with the given env. Returns stdout."""
+    func = _extract_function()
+    script = f"{func}\nlog_boot_context\n"
+    # Empty base env so PATH lookups (`id`, `hostname`, `date`, `ls`) still work
+    # but no inherited auth vars leak into the test. We restore PATH explicitly.
+    safe_env = {"PATH": os.environ.get("PATH", "/usr/bin:/bin")}
+    safe_env.update(env)
+    result = subprocess.run(
+        ["/bin/sh", "-c", script],
+        env=safe_env,
+        capture_output=True,
+        text=True,
+        timeout=10,
+        check=False,
+    )
+    assert result.returncode == 0, (
+        f"log_boot_context exited rc={result.returncode}\n"
+        f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}"
+    )
+    return result.stdout
+
+
+# Audit names — kept in lockstep with adapter.py's _AUTH_ENV_AUDIT and the
+# entrypoint.sh for-loop. test_audit_env_list_matches_entrypoint_sh and
+# test_loop_var_list_matches_audit (below) gate any drift across the three
+# locations.
+_AUDIT_NAMES = (
+    "CLAUDE_CODE_OAUTH_TOKEN",
+    "ANTHROPIC_API_KEY",
+    "ANTHROPIC_AUTH_TOKEN",
+    "ANTHROPIC_BASE_URL",
+    "MINIMAX_API_KEY",
+    "GLM_API_KEY",
+    "KIMI_API_KEY",
+    "DEEPSEEK_API_KEY",
+)
+
+
+def test_emits_set_for_present_env():
+    """A set var must produce `env NAME=set` — proves the eval-deref works."""
+    out = _run_function({"MINIMAX_API_KEY": "secret-MUST-NOT-LEAK"})
+    assert "env MINIMAX_API_KEY=set" in out
+
+
+def test_emits_unset_for_absent_env():
+    """An unset var must produce `env NAME=unset` — proves the empty-string branch."""
+    out = _run_function({})
+    for name in _AUDIT_NAMES:
+        assert f"env {name}=unset" in out, (
+            f"missing `env {name}=unset` line — for-loop body may be miscoded"
+        )
+
+
+def test_never_leaks_value():
+    """The audit prints NAMES, not VALUES. Regression here = secret leak.
+
+    Same threat model as the Python-side test: an operator-visible boot log
+    that contains the actual key would defeat the whole point of the audit
+    (the audit exists so we can answer 'is the key present' WITHOUT exposing
+    the key). A `eval "val=\\$$var"` typo collapsing to `echo $var` would
+    trip this test.
+    """
+    secret = "sk-FAKE-MUST-NEVER-APPEAR-IN-BOOT-LOG"
+    out = _run_function({
+        "MINIMAX_API_KEY": secret,
+        "CLAUDE_CODE_OAUTH_TOKEN": secret,
+        "ANTHROPIC_BASE_URL": "https://api.example.com",
+    })
+    assert secret not in out, f"boot-context log leaked the env VALUE:\n{out}"
+    # ANTHROPIC_BASE_URL is the most-likely-to-be-logged-by-mistake field
+    # because operators sometimes WANT to see it; pin that it's still
+    # name-only.
+    assert "https://api.example.com" not in out
+
+
+def test_emits_workspace_id_and_platform_url():
+    """WORKSPACE_ID and PLATFORM_URL appear by VALUE — these are not secrets.
+
+    They're the operator-visible identifiers a support engineer needs to
+    correlate logs with platform records. Pinning the field shape so a
+    later refactor doesn't accidentally redact them.
+    """
+    out = _run_function({
+        "WORKSPACE_ID": "ws-test-1234",
+        "PLATFORM_URL": "https://test.example.com",
+    })
+    assert "workspace_id=ws-test-1234" in out
+    assert "platform_url=https://test.example.com" in out
+
+
+def test_emits_unset_marker_when_workspace_id_missing():
+    """Missing WORKSPACE_ID falls back to the literal `<unset>` placeholder.
+
+    A support engineer reading the boot log must be able to distinguish
+    'WORKSPACE_ID was empty string' from 'WORKSPACE_ID was never injected
+    by the platform'. The shell `${VAR:-<unset>}` default handles that.
+    """
+    out = _run_function({})
+    assert "workspace_id=<unset>" in out
+    assert "platform_url=<unset>" in out
+
+
+def test_emits_uid_and_gid():
+    """uid/gid line is critical — answers 'did the privilege drop happen?'
+
+    The two-emission pattern (pre-gosu as root, post-gosu as agent) only
+    works as a diagnostic if uid/gid is in every emission. Pin the field
+    shape; we don't pin the literal value because CI runs vary.
+    """
+    out = _run_function({})
+    assert re.search(r"uid=\d+\s+gid=\d+", out), (
+        f"missing or malformed uid/gid line:\n{out}"
+    )
+
+
+def test_emits_boot_marker():
+    """Each emission starts with the dated `entrypoint boot` banner.
+
+    Operators grep for this to count restarts in a crash loop.
+    """
+    out = _run_function({})
+    # Format: "----- entrypoint boot 2026-05-02T12:34:56Z -----"
+    assert re.search(
+        r"-----\s+entrypoint boot \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\s+-----",
+        out,
+    ), f"missing boot banner:\n{out}"
+
+
+def test_loop_var_list_matches_audit():
+    """The for-loop's literal NAME list must match _AUDIT_NAMES (this file).
+
+    Companion to test_audit_env_list_matches_entrypoint_sh in
+    test_adapter_logging.py: that test cross-checks adapter.py vs
+    entrypoint.sh; this one cross-checks entrypoint.sh vs the test
+    fixture above. If a maintainer adds a vendor to entrypoint.sh
+    without updating the audit name tuple in this file, the existing
+    `test_emits_unset_for_absent_env` would still pass (because all
+    audited names also appear in the loop), but the maintainer would
+    have a false sense of coverage. This test catches that.
+    """
+    text = ENTRYPOINT.read_text()
+    loop_line = next(
+        (line for line in text.splitlines()
+         if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line),
+        None,
+    )
+    assert loop_line, "entrypoint.sh missing the auth-env for-loop"
+    names_in_shell = tuple(
+        loop_line.split("for var in", 1)[1].split(";", 1)[0].split()
+    )
+    assert set(names_in_shell) == set(_AUDIT_NAMES), (
+        f"_AUDIT_NAMES in this file ({set(_AUDIT_NAMES)}) and the for-loop "
+        f"in entrypoint.sh ({set(names_in_shell)}) disagree — update the "
+        "test fixture or the shell loop to bring them back in sync."
+    )