test: bash coverage for entrypoint.sh log_boot_context()
Some checks failed
CI / validate (push) Failing after 0s
CI / Adapter unit tests (push) Failing after 6s

The Python adapter audit (test_adapter_logging.py) pins the
adapter.py side, but the entrypoint shell function fires earlier and
twice (pre-gosu + post-gosu). When the SDK import wedge keeps the
adapter from running at all, the shell emission is the operator's
only visibility into the boot env.

Eight new tests cover:
- env NAME=set / env NAME=unset shape for every audited var
- value-leak guard: secret strings never appear in output
- WORKSPACE_ID + PLATFORM_URL passthrough by value (not secret)
- <unset> fallback for missing platform identifiers
- uid/gid line shape (used to verify the privilege drop)
- dated boot banner shape (used to count restarts in a crash loop)
- cross-file gate: shell for-loop names == fixture tuple, mirroring
  test_audit_env_list_matches_entrypoint_sh's adapter.py↔shell gate

Strategy: regex-extract the function body from entrypoint.sh and run
it in a fresh /bin/sh with controlled env. We never source the whole
entrypoint because it would chown /workspace and exec molecule-runtime.

Closes the gap from task #251 (follow-up to PR #32 boot-debug logging).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-05-02 22:26:35 -07:00
parent 50e16c5c73
commit 227787bbbd

View File

@ -0,0 +1,206 @@
r"""Tests for entrypoint.sh's log_boot_context() shell function.
The Python-side audit (test_adapter_logging.py) pins what `_audit_auth_env_presence`
in adapter.py emits. But the shell function fires FIRST twice, even (once
pre-gosu as root, once post-gosu as agent). When the adapter never runs at
all because the SDK import fails, the entrypoint emission is the operator's
ONLY visibility into the boot env. So this contract needs its own test.
The cross-file gate `test_audit_env_list_matches_entrypoint_sh` proves the
NAME LIST matches; this file proves the SHELL CODE actually emits the
right lines for those names. Without this, a typo in the for-loop body
(e.g. `eval "val=\$$var"` `val=$var`, which would print the literal
name not its value) silently breaks the audit.
Strategy: extract the `log_boot_context()` function body from entrypoint.sh
and run it in a fresh subprocess with controlled env. Asserts on stdout.
We never source entrypoint.sh wholesale because it would chown /workspace
and exec molecule-runtime neither is appropriate in a test sandbox.
"""
from __future__ import annotations
import os
import re
import subprocess
from pathlib import Path
import pytest
TEMPLATE_DIR = Path(__file__).resolve().parent.parent
ENTRYPOINT = TEMPLATE_DIR / "entrypoint.sh"
def _extract_function() -> str:
"""Pull just the log_boot_context() function definition out of entrypoint.sh.
Returns the literal function definition (`log_boot_context() { ... }`) as
a string, suitable for `sh -c "<func>; log_boot_context"`. Bails with a
clear message if the function can't be located — that itself is a
regression worth a loud test failure.
"""
text = ENTRYPOINT.read_text()
# `log_boot_context() {` on its own line, then everything up to the
# matching closing `}` at column 0. The function is small and shape-stable;
# we don't try to be a full shell parser.
match = re.search(r"^log_boot_context\(\)\s*\{.*?^\}\s*$", text, re.DOTALL | re.MULTILINE)
if not match:
pytest.fail("Could not locate log_boot_context() in entrypoint.sh")
return match.group(0)
def _run_function(env: dict[str, str]) -> str:
"""Run log_boot_context() in a fresh /bin/sh with the given env. Returns stdout."""
func = _extract_function()
script = f"{func}\nlog_boot_context\n"
# Empty base env so PATH lookups (`id`, `hostname`, `date`, `ls`) still work
# but no inherited auth vars leak into the test. We restore PATH explicitly.
safe_env = {"PATH": os.environ.get("PATH", "/usr/bin:/bin")}
safe_env.update(env)
result = subprocess.run(
["/bin/sh", "-c", script],
env=safe_env,
capture_output=True,
text=True,
timeout=10,
check=False,
)
assert result.returncode == 0, (
f"log_boot_context exited rc={result.returncode}\n"
f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}"
)
return result.stdout
# Audit names — kept in lockstep with adapter.py's _AUTH_ENV_AUDIT and the
# entrypoint.sh for-loop. test_audit_env_list_matches_entrypoint_sh and
# test_loop_var_list_matches_audit (below) gate any drift across the three
# locations.
_AUDIT_NAMES = (
"CLAUDE_CODE_OAUTH_TOKEN",
"ANTHROPIC_API_KEY",
"ANTHROPIC_AUTH_TOKEN",
"ANTHROPIC_BASE_URL",
"MINIMAX_API_KEY",
"GLM_API_KEY",
"KIMI_API_KEY",
"DEEPSEEK_API_KEY",
)
def test_emits_set_for_present_env():
"""A set var must produce `env NAME=set` — proves the eval-deref works."""
out = _run_function({"MINIMAX_API_KEY": "secret-MUST-NOT-LEAK"})
assert "env MINIMAX_API_KEY=set" in out
def test_emits_unset_for_absent_env():
"""An unset var must produce `env NAME=unset` — proves the empty-string branch."""
out = _run_function({})
for name in _AUDIT_NAMES:
assert f"env {name}=unset" in out, (
f"missing `env {name}=unset` line — for-loop body may be miscoded"
)
def test_never_leaks_value():
"""The audit prints NAMES, not VALUES. Regression here = secret leak.
Same threat model as the Python-side test: an operator-visible boot log
that contains the actual key would defeat the whole point of the audit
(the audit exists so we can answer 'is the key present' WITHOUT exposing
the key). A `eval "val=\\$$var"` typo collapsing to `echo $var` would
trip this test.
"""
secret = "sk-FAKE-MUST-NEVER-APPEAR-IN-BOOT-LOG"
out = _run_function({
"MINIMAX_API_KEY": secret,
"CLAUDE_CODE_OAUTH_TOKEN": secret,
"ANTHROPIC_BASE_URL": "https://api.example.com",
})
assert secret not in out, f"boot-context log leaked the env VALUE:\n{out}"
# ANTHROPIC_BASE_URL is the most-likely-to-be-logged-by-mistake field
# because operators sometimes WANT to see it; pin that it's still
# name-only.
assert "https://api.example.com" not in out
def test_emits_workspace_id_and_platform_url():
"""WORKSPACE_ID and PLATFORM_URL appear by VALUE — these are not secrets.
They're the operator-visible identifiers a support engineer needs to
correlate logs with platform records. Pinning the field shape so a
later refactor doesn't accidentally redact them.
"""
out = _run_function({
"WORKSPACE_ID": "ws-test-1234",
"PLATFORM_URL": "https://test.example.com",
})
assert "workspace_id=ws-test-1234" in out
assert "platform_url=https://test.example.com" in out
def test_emits_unset_marker_when_workspace_id_missing():
"""Missing WORKSPACE_ID falls back to the literal `<unset>` placeholder.
A support engineer reading the boot log must be able to distinguish
'WORKSPACE_ID was empty string' from 'WORKSPACE_ID was never injected
by the platform'. The shell `${VAR:-<unset>}` default handles that.
"""
out = _run_function({})
assert "workspace_id=<unset>" in out
assert "platform_url=<unset>" in out
def test_emits_uid_and_gid():
"""uid/gid line is critical — answers 'did the privilege drop happen?'
The two-emission pattern (pre-gosu as root, post-gosu as agent) only
works as a diagnostic if uid/gid is in every emission. Pin the field
shape; we don't pin the literal value because CI runs vary.
"""
out = _run_function({})
assert re.search(r"uid=\d+\s+gid=\d+", out), (
f"missing or malformed uid/gid line:\n{out}"
)
def test_emits_boot_marker():
"""Each emission starts with the dated `entrypoint boot` banner.
Operators grep for this to count restarts in a crash loop.
"""
out = _run_function({})
# Format: "----- entrypoint boot 2026-05-02T12:34:56Z -----"
assert re.search(
r"-----\s+entrypoint boot \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\s+-----",
out,
), f"missing boot banner:\n{out}"
def test_loop_var_list_matches_audit():
"""The for-loop's literal NAME list must match _AUDIT_NAMES (this file).
Companion to test_audit_env_list_matches_entrypoint_sh in
test_adapter_logging.py: that test cross-checks adapter.py vs
entrypoint.sh; this one cross-checks entrypoint.sh vs the test
fixture above. If a maintainer adds a vendor to entrypoint.sh
without updating the audit name tuple in this file, the existing
`test_emits_unset_for_absent_env` would still pass (because all
audited names also appear in the loop), but the maintainer would
have a false sense of coverage. This test catches that.
"""
text = ENTRYPOINT.read_text()
loop_line = next(
(line for line in text.splitlines()
if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line),
None,
)
assert loop_line, "entrypoint.sh missing the auth-env for-loop"
names_in_shell = tuple(
loop_line.split("for var in", 1)[1].split(";", 1)[0].split()
)
assert set(names_in_shell) == set(_AUDIT_NAMES), (
f"_AUDIT_NAMES in this file ({set(_AUDIT_NAMES)}) and the for-loop "
f"in entrypoint.sh ({set(names_in_shell)}) disagree — update the "
"test fixture or the shell loop to bring them back in sync."
)