test: bash coverage for entrypoint.sh log_boot_context()
The Python adapter audit (test_adapter_logging.py) pins the adapter.py side, but the entrypoint shell function fires earlier and twice (pre-gosu + post-gosu). When the SDK import wedge keeps the adapter from running at all, the shell emission is the operator's only visibility into the boot env. Eight new tests cover: - env NAME=set / env NAME=unset shape for every audited var - value-leak guard: secret strings never appear in output - WORKSPACE_ID + PLATFORM_URL passthrough by value (not secret) - <unset> fallback for missing platform identifiers - uid/gid line shape (used to verify the privilege drop) - dated boot banner shape (used to count restarts in a crash loop) - cross-file gate: shell for-loop names == fixture tuple, mirroring test_audit_env_list_matches_entrypoint_sh's adapter.py↔shell gate Strategy: regex-extract the function body from entrypoint.sh and run it in a fresh /bin/sh with controlled env. We never source the whole entrypoint because it would chown /workspace and exec molecule-runtime. Closes the gap from task #251 (follow-up to PR #32 boot-debug logging). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
50e16c5c73
commit
227787bbbd
206
tests/test_entrypoint_boot_context.py
Normal file
206
tests/test_entrypoint_boot_context.py
Normal file
@ -0,0 +1,206 @@
|
||||
r"""Tests for entrypoint.sh's log_boot_context() shell function.
|
||||
|
||||
The Python-side audit (test_adapter_logging.py) pins what `_audit_auth_env_presence`
|
||||
in adapter.py emits. But the shell function fires FIRST — twice, even (once
|
||||
pre-gosu as root, once post-gosu as agent). When the adapter never runs at
|
||||
all because the SDK import fails, the entrypoint emission is the operator's
|
||||
ONLY visibility into the boot env. So this contract needs its own test.
|
||||
|
||||
The cross-file gate `test_audit_env_list_matches_entrypoint_sh` proves the
|
||||
NAME LIST matches; this file proves the SHELL CODE actually emits the
|
||||
right lines for those names. Without this, a typo in the for-loop body
|
||||
(e.g. `eval "val=\$$var"` → `val=$var`, which would print the literal
|
||||
name not its value) silently breaks the audit.
|
||||
|
||||
Strategy: extract the `log_boot_context()` function body from entrypoint.sh
|
||||
and run it in a fresh subprocess with controlled env. Asserts on stdout.
|
||||
We never source entrypoint.sh wholesale because it would chown /workspace
|
||||
and exec molecule-runtime — neither is appropriate in a test sandbox.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
TEMPLATE_DIR = Path(__file__).resolve().parent.parent
|
||||
ENTRYPOINT = TEMPLATE_DIR / "entrypoint.sh"
|
||||
|
||||
|
||||
def _extract_function() -> str:
|
||||
"""Pull just the log_boot_context() function definition out of entrypoint.sh.
|
||||
|
||||
Returns the literal function definition (`log_boot_context() { ... }`) as
|
||||
a string, suitable for `sh -c "<func>; log_boot_context"`. Bails with a
|
||||
clear message if the function can't be located — that itself is a
|
||||
regression worth a loud test failure.
|
||||
"""
|
||||
text = ENTRYPOINT.read_text()
|
||||
# `log_boot_context() {` on its own line, then everything up to the
|
||||
# matching closing `}` at column 0. The function is small and shape-stable;
|
||||
# we don't try to be a full shell parser.
|
||||
match = re.search(r"^log_boot_context\(\)\s*\{.*?^\}\s*$", text, re.DOTALL | re.MULTILINE)
|
||||
if not match:
|
||||
pytest.fail("Could not locate log_boot_context() in entrypoint.sh")
|
||||
return match.group(0)
|
||||
|
||||
|
||||
def _run_function(env: dict[str, str]) -> str:
|
||||
"""Run log_boot_context() in a fresh /bin/sh with the given env. Returns stdout."""
|
||||
func = _extract_function()
|
||||
script = f"{func}\nlog_boot_context\n"
|
||||
# Empty base env so PATH lookups (`id`, `hostname`, `date`, `ls`) still work
|
||||
# but no inherited auth vars leak into the test. We restore PATH explicitly.
|
||||
safe_env = {"PATH": os.environ.get("PATH", "/usr/bin:/bin")}
|
||||
safe_env.update(env)
|
||||
result = subprocess.run(
|
||||
["/bin/sh", "-c", script],
|
||||
env=safe_env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
check=False,
|
||||
)
|
||||
assert result.returncode == 0, (
|
||||
f"log_boot_context exited rc={result.returncode}\n"
|
||||
f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}"
|
||||
)
|
||||
return result.stdout
|
||||
|
||||
|
||||
# Audit names — kept in lockstep with adapter.py's _AUTH_ENV_AUDIT and the
|
||||
# entrypoint.sh for-loop. test_audit_env_list_matches_entrypoint_sh and
|
||||
# test_loop_var_list_matches_audit (below) gate any drift across the three
|
||||
# locations.
|
||||
_AUDIT_NAMES = (
|
||||
"CLAUDE_CODE_OAUTH_TOKEN",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_AUTH_TOKEN",
|
||||
"ANTHROPIC_BASE_URL",
|
||||
"MINIMAX_API_KEY",
|
||||
"GLM_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
"DEEPSEEK_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
def test_emits_set_for_present_env():
|
||||
"""A set var must produce `env NAME=set` — proves the eval-deref works."""
|
||||
out = _run_function({"MINIMAX_API_KEY": "secret-MUST-NOT-LEAK"})
|
||||
assert "env MINIMAX_API_KEY=set" in out
|
||||
|
||||
|
||||
def test_emits_unset_for_absent_env():
|
||||
"""An unset var must produce `env NAME=unset` — proves the empty-string branch."""
|
||||
out = _run_function({})
|
||||
for name in _AUDIT_NAMES:
|
||||
assert f"env {name}=unset" in out, (
|
||||
f"missing `env {name}=unset` line — for-loop body may be miscoded"
|
||||
)
|
||||
|
||||
|
||||
def test_never_leaks_value():
|
||||
"""The audit prints NAMES, not VALUES. Regression here = secret leak.
|
||||
|
||||
Same threat model as the Python-side test: an operator-visible boot log
|
||||
that contains the actual key would defeat the whole point of the audit
|
||||
(the audit exists so we can answer 'is the key present' WITHOUT exposing
|
||||
the key). A `eval "val=\\$$var"` typo collapsing to `echo $var` would
|
||||
trip this test.
|
||||
"""
|
||||
secret = "sk-FAKE-MUST-NEVER-APPEAR-IN-BOOT-LOG"
|
||||
out = _run_function({
|
||||
"MINIMAX_API_KEY": secret,
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": secret,
|
||||
"ANTHROPIC_BASE_URL": "https://api.example.com",
|
||||
})
|
||||
assert secret not in out, f"boot-context log leaked the env VALUE:\n{out}"
|
||||
# ANTHROPIC_BASE_URL is the most-likely-to-be-logged-by-mistake field
|
||||
# because operators sometimes WANT to see it; pin that it's still
|
||||
# name-only.
|
||||
assert "https://api.example.com" not in out
|
||||
|
||||
|
||||
def test_emits_workspace_id_and_platform_url():
|
||||
"""WORKSPACE_ID and PLATFORM_URL appear by VALUE — these are not secrets.
|
||||
|
||||
They're the operator-visible identifiers a support engineer needs to
|
||||
correlate logs with platform records. Pinning the field shape so a
|
||||
later refactor doesn't accidentally redact them.
|
||||
"""
|
||||
out = _run_function({
|
||||
"WORKSPACE_ID": "ws-test-1234",
|
||||
"PLATFORM_URL": "https://test.example.com",
|
||||
})
|
||||
assert "workspace_id=ws-test-1234" in out
|
||||
assert "platform_url=https://test.example.com" in out
|
||||
|
||||
|
||||
def test_emits_unset_marker_when_workspace_id_missing():
|
||||
"""Missing WORKSPACE_ID falls back to the literal `<unset>` placeholder.
|
||||
|
||||
A support engineer reading the boot log must be able to distinguish
|
||||
'WORKSPACE_ID was empty string' from 'WORKSPACE_ID was never injected
|
||||
by the platform'. The shell `${VAR:-<unset>}` default handles that.
|
||||
"""
|
||||
out = _run_function({})
|
||||
assert "workspace_id=<unset>" in out
|
||||
assert "platform_url=<unset>" in out
|
||||
|
||||
|
||||
def test_emits_uid_and_gid():
|
||||
"""uid/gid line is critical — answers 'did the privilege drop happen?'
|
||||
|
||||
The two-emission pattern (pre-gosu as root, post-gosu as agent) only
|
||||
works as a diagnostic if uid/gid is in every emission. Pin the field
|
||||
shape; we don't pin the literal value because CI runs vary.
|
||||
"""
|
||||
out = _run_function({})
|
||||
assert re.search(r"uid=\d+\s+gid=\d+", out), (
|
||||
f"missing or malformed uid/gid line:\n{out}"
|
||||
)
|
||||
|
||||
|
||||
def test_emits_boot_marker():
|
||||
"""Each emission starts with the dated `entrypoint boot` banner.
|
||||
|
||||
Operators grep for this to count restarts in a crash loop.
|
||||
"""
|
||||
out = _run_function({})
|
||||
# Format: "----- entrypoint boot 2026-05-02T12:34:56Z -----"
|
||||
assert re.search(
|
||||
r"-----\s+entrypoint boot \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\s+-----",
|
||||
out,
|
||||
), f"missing boot banner:\n{out}"
|
||||
|
||||
|
||||
def test_loop_var_list_matches_audit():
|
||||
"""The for-loop's literal NAME list must match _AUDIT_NAMES (this file).
|
||||
|
||||
Companion to test_audit_env_list_matches_entrypoint_sh in
|
||||
test_adapter_logging.py: that test cross-checks adapter.py vs
|
||||
entrypoint.sh; this one cross-checks entrypoint.sh vs the test
|
||||
fixture above. If a maintainer adds a vendor to entrypoint.sh
|
||||
without updating the audit name tuple in this file, the existing
|
||||
`test_emits_unset_for_absent_env` would still pass (because all
|
||||
audited names also appear in the loop), but the maintainer would
|
||||
have a false sense of coverage. This test catches that.
|
||||
"""
|
||||
text = ENTRYPOINT.read_text()
|
||||
loop_line = next(
|
||||
(line for line in text.splitlines()
|
||||
if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line),
|
||||
None,
|
||||
)
|
||||
assert loop_line, "entrypoint.sh missing the auth-env for-loop"
|
||||
names_in_shell = tuple(
|
||||
loop_line.split("for var in", 1)[1].split(";", 1)[0].split()
|
||||
)
|
||||
assert set(names_in_shell) == set(_AUDIT_NAMES), (
|
||||
f"_AUDIT_NAMES in this file ({set(_AUDIT_NAMES)}) and the for-loop "
|
||||
f"in entrypoint.sh ({set(names_in_shell)}) disagree — update the "
|
||||
"test fixture or the shell loop to bring them back in sync."
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user