fix(image,ci): create /agent-home + expose silent T4 probe failures (unblocks task #305) #39
@@ -356,8 +356,31 @@ jobs:
|
||||
else:
|
||||
msg = f"FAIL {name} ({sev}): rc={r.returncode} source={cap.get('source','?')}"
|
||||
print(f"::error::{msg}")
|
||||
# Some probes redirect stderr to /dev/null in the
|
||||
# contract YAML (e.g. docker_socket_reachable) or
|
||||
# produce nothing on failure (e.g. test-bracket
|
||||
# exits). Re-run those WITHOUT the contract's
|
||||
# internal redirection by prefixing `set -x` so the
|
||||
# actual failing command + its error surface in
|
||||
# stderr, then dump both streams. This is purely a
|
||||
# diagnostic re-run — the verdict above
|
||||
# (`r.returncode`) is the gate.
|
||||
if r.stderr.strip():
|
||||
print(f" stderr: {r.stderr.strip()}")
|
||||
if r.stdout.strip():
|
||||
print(f" stdout: {r.stdout.strip()}")
|
||||
if not (r.stderr.strip() or r.stdout.strip()):
|
||||
# Silent failure — re-run with `set -x` to
|
||||
# expose the failing command path.
|
||||
r2 = subprocess.run(
|
||||
["docker","exec","-u","agent",probe,"sh","-xc",probe_sh],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
tail = (r2.stderr + r2.stdout).strip().splitlines()[-10:]
|
||||
if tail:
|
||||
print(" diag (set -x tail):")
|
||||
for line in tail:
|
||||
print(f" | {line}")
|
||||
if sev == "hard":
|
||||
fails_hard.append(name)
|
||||
else:
|
||||
|
||||
+18
@@ -57,6 +57,24 @@ RUN set -eux; \
|
||||
usermod -aG docker agent; \
|
||||
id agent
|
||||
|
||||
# --- Files API redesign root (task #128) — /agent-home -----------------
|
||||
# /agent-home is the user-writable file-tree root the runtime exposes
|
||||
# to the agent (per task #128). The Layer-3 T4 conformance gate asserts
|
||||
# `agent_home_writable` via the uniform contract emitted by molecule-core
|
||||
# (workspace-server/internal/provisioner/t4_privilege_contract.go).
|
||||
# The probe writes a marker file at /agent-home/.t4-cap-write-probe-* as
|
||||
# uid-1000 agent WITHOUT sudo — so the directory must (a) exist in the
|
||||
# image, and (b) be writable by agent without a recursive chown step at
|
||||
# entrypoint time (the entrypoint may not run in the T4 smoke probe — it
|
||||
# starts the container with `--entrypoint /bin/sh ... 'sleep 600'`).
|
||||
# Creating it during the image build, owned agent:agent mode 0755, is
|
||||
# the minimal change that satisfies the contract for both the live boot
|
||||
# path (entrypoint may re-chown if a volume mount masks the build-time
|
||||
# dir) and the smoke-probe path (which bypasses entrypoint).
|
||||
#
|
||||
# This is image-side only; no platform/provisioner contract changes.
|
||||
RUN install -d -m 0755 -o agent -g agent /agent-home
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# RUNTIME_VERSION is forwarded from the reusable publish workflow as
|
||||
|
||||
@@ -52,6 +52,15 @@ if [ "$(id -u)" = "0" ]; then
|
||||
# Layer-3 conformance gate asserts owner_uid==1000 on the running
|
||||
# container alongside the host-root-reach assertion.
|
||||
chown -R agent:agent /configs 2>/dev/null
|
||||
# /agent-home — Files API redesign root (task #128). Created with
|
||||
# agent:agent ownership during image build (see Dockerfile). The
|
||||
# idempotent mkdir + chown here is defense-in-depth in case a
|
||||
# platform volume mount masks the build-time directory or comes up
|
||||
# root-owned (typical for empty Docker named volumes on Linux). The
|
||||
# T4 conformance gate's `agent_home_writable` probe runs as uid-1000
|
||||
# WITHOUT sudo, so ownership must be correct before exec gosu.
|
||||
mkdir -p /agent-home 2>/dev/null || true
|
||||
chown agent:agent /agent-home 2>/dev/null || true
|
||||
# /workspace handling — only chown when the contents are root-owned
|
||||
# (typical on Docker Desktop on Windows where host uid maps to 0).
|
||||
# On Linux Docker with matching uids the recursive chown is skipped
|
||||
|
||||
Reference in New Issue
Block a user