fix(image,ci): create /agent-home + expose silent T4 probe failures (unblocks task #305) #39

Open
core-devops wants to merge 1 commits from fix/t4-conformance-create-agent-home into main
3 changed files with 50 additions and 0 deletions
+23
View File
@@ -356,8 +356,31 @@ jobs:
else:
msg = f"FAIL {name} ({sev}): rc={r.returncode} source={cap.get('source','?')}"
print(f"::error::{msg}")
# Some probes redirect stderr to /dev/null in the
# contract YAML (e.g. docker_socket_reachable) or
# produce nothing on failure (e.g. test-bracket
# exits). Re-run those WITHOUT the contract's
# internal redirection by prefixing `set -x` so the
# actual failing command + its error surface in
# stderr, then dump both streams. This is purely a
# diagnostic re-run — the verdict above
# (`r.returncode`) is the gate.
if r.stderr.strip():
print(f" stderr: {r.stderr.strip()}")
if r.stdout.strip():
print(f" stdout: {r.stdout.strip()}")
if not (r.stderr.strip() or r.stdout.strip()):
# Silent failure — re-run with `set -x` to
# expose the failing command path.
r2 = subprocess.run(
["docker","exec","-u","agent",probe,"sh","-xc",probe_sh],
capture_output=True, text=True,
)
tail = (r2.stderr + r2.stdout).strip().splitlines()[-10:]
if tail:
print(" diag (set -x tail):")
for line in tail:
print(f" | {line}")
if sev == "hard":
fails_hard.append(name)
else:
+18
View File
@@ -57,6 +57,24 @@ RUN set -eux; \
usermod -aG docker agent; \
id agent
# --- Files API redesign root (task #128) — /agent-home -----------------
# /agent-home is the user-writable file-tree root the runtime exposes
# to the agent (per task #128). The Layer-3 T4 conformance gate asserts
# `agent_home_writable` via the uniform contract emitted by molecule-core
# (workspace-server/internal/provisioner/t4_privilege_contract.go).
# The probe writes a marker file at /agent-home/.t4-cap-write-probe-* as
# uid-1000 agent WITHOUT sudo — so the directory must (a) exist in the
# image, and (b) be writable by agent without a recursive chown step at
# entrypoint time (the entrypoint may not run in the T4 smoke probe — it
# starts the container with `--entrypoint /bin/sh ... 'sleep 600'`).
# Creating it during the image build, owned agent:agent mode 0755, is
# the minimal change that satisfies the contract for both the live boot
# path (entrypoint may re-chown if a volume mount masks the build-time
# dir) and the smoke-probe path (which bypasses entrypoint).
#
# This is image-side only; no platform/provisioner contract changes.
RUN install -d -m 0755 -o agent -g agent /agent-home
WORKDIR /app
# RUNTIME_VERSION is forwarded from the reusable publish workflow as
+9
View File
@@ -52,6 +52,15 @@ if [ "$(id -u)" = "0" ]; then
# Layer-3 conformance gate asserts owner_uid==1000 on the running
# container alongside the host-root-reach assertion.
chown -R agent:agent /configs 2>/dev/null
# /agent-home — Files API redesign root (task #128). Created with
# agent:agent ownership during image build (see Dockerfile). The
# idempotent mkdir + chown here is defense-in-depth in case a
# platform volume mount masks the build-time directory or comes up
# root-owned (typical for empty Docker named volumes on Linux). The
# T4 conformance gate's `agent_home_writable` probe runs as uid-1000
# WITHOUT sudo, so ownership must be correct before exec gosu.
mkdir -p /agent-home 2>/dev/null || true
chown agent:agent /agent-home 2>/dev/null || true
# /workspace handling — only chown when the contents are root-owned
# (typical on Docker Desktop on Windows where host uid maps to 0).
# On Linux Docker with matching uids the recursive chown is skipped