fix(e2e): use full workspace IDs for container/volume names after KI-013 (#2499) #2500

Merged
core-devops merged 4 commits from fix/sev-2499-e2e-ki013-full-id-names into main 2026-06-10 02:51:54 +00:00
9 changed files with 87 additions and 27 deletions
+36
View File
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Drift-prevention guard: SEV #2499 class (KI-013 container/volume naming).
#
# KI-013 removed 12-char UUID truncation from container/volume names.
# E2E scripts must use FULL workspace IDs (ws-${WSID}) when referencing
# containers and volumes. Any ${VAR:0:12} truncation in a ws-* context
# is a regression risk.
#
# Run: bash .gitea/scripts/lint-e2e-ki013-container-names.sh
set -euo pipefail
PAT=':0:12([^0-9]|$)'
ERR=0
for f in tests/e2e/*.sh; do
# Allow :0:12 when it is NOT inside a ws-* container/volume reference.
# The grep looks for ws- followed anywhere on the same line by ${*:0:12.
MATCHES=$(grep -nE "$PAT" "$f" 2>/dev/null || true)
if [ -n "$MATCHES" ]; then
echo "::error::SEV-2499 drift guard: truncated workspace ID in container/volume name"
echo "::error::file=$f"
echo "$MATCHES" | while read -r line; do
echo "::error:: $line"
done
ERR=1
fi
done
if [ "$ERR" -ne 0 ]; then
echo ""
echo "FAIL: E2E scripts reference containers/volumes with 12-char truncated IDs."
echo " KI-013 requires FULL workspace IDs. Update the flagged lines."
exit 1
fi
echo "PASS: No truncated workspace IDs in E2E container/volume references."
+8
View File
@@ -404,6 +404,14 @@ jobs:
run: |
bash scripts/test-promote-tenant-image.sh
- if: ${{ needs.changes.outputs.scripts == 'true' }}
name: Drift guard — KI-013 container/volume naming (SEV #2499)
# KI-013 removed 12-char UUID truncation from container/volume names.
# E2E scripts must use FULL workspace IDs. This fail-closed guard
# prevents regressions where a new/modified script reintroduces the
# old truncated-name pattern (the root cause of SEV #2499).
run: bash .gitea/scripts/lint-e2e-ki013-container-names.sh
- if: ${{ needs.changes.outputs.scripts == 'true' }}
name: Shellcheck promote-tenant-image script
# scripts/ is excluded from the bulk shellcheck pass above (legacy
+1 -1
View File
@@ -76,7 +76,7 @@ fi
log "Step 3 — Seed a file inside /workspace and ask agent to reference it"
# Relies on /workspace being writable by the platform (we copy as root via
# docker exec, mimicking the path a real agent would use through its tools).
CONTAINER=$(docker ps --format '{{.Names}}' | grep -E "^ws-${WSID:0:12}" | head -1)
CONTAINER=$(docker ps --format '{{.Names}}' | grep -E "^ws-${WSID}" | head -1)
[ -n "$CONTAINER" ] || { echo "container not found"; exit 1; }
docker exec "$CONTAINER" sh -c 'echo "E2E report body $(date -u +%s)" > /workspace/e2e-report.txt'
@@ -145,7 +145,7 @@ check_runtime() {
fails=$((fails + 1)); return
fi
local container
container=$(docker ps --format '{{.Names}}' | grep -E "^ws-${wsid:0:12}" | head -1)
container=$(docker ps --format '{{.Names}}' | grep -E "^ws-${wsid}" | head -1)
[ -z "$container" ] && { echo "FAIL $label: container not found"; fails=$((fails + 1)); return; }
has_patch_in_container "$container" || { echo "FAIL $label: platform helpers missing"; fails=$((fails + 1)); return; }
+2 -2
View File
@@ -94,8 +94,8 @@ check_contains "Upload child prompt" "replaced" "$CHILD_UPLOAD"
# Verify prompts in containers
sleep 2
ROOT_CONTAINER=$(docker ps --filter "name=ws-${ROOT:0:12}" -q | head -1)
CHILD_CONTAINER=$(docker ps --filter "name=ws-${CHILD:0:12}" -q | head -1)
ROOT_CONTAINER=$(docker ps --filter "name=ws-${ROOT}" -q | head -1)
CHILD_CONTAINER=$(docker ps --filter "name=ws-${CHILD}" -q | head -1)
ROOT_HAS_PROMPT=$(docker exec $ROOT_CONTAINER cat /configs/system-prompt.md 2>/dev/null | head -1)
check_contains "Root container has prompt" "Root Agent" "$ROOT_HAS_PROMPT"
+3 -6
View File
@@ -153,19 +153,17 @@ RT_HM_ID=$(echo "$R" | jq_extract "['id']")
# Wait for containers to start (poll up to 30s for first one to appear)
if command -v docker &>/dev/null; then
short_cc="${RT_CC_ID:0:12}"
for _ in 1 2 3 4 5 6; do
sleep 5
if docker inspect "ws-${short_cc}" >/dev/null 2>&1; then break; fi
if docker inspect "ws-${RT_CC_ID}" >/dev/null 2>&1; then break; fi
done
_check_image() {
local ws_id="$1" expected_tag="$2" label="$3"
local short_id="${ws_id:0:12}"
# Poll up to 30s for image to appear
local actual_image="NOT_FOUND"
for _ in 1 2 3 4 5 6; do
actual_image=$(docker inspect "ws-${short_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "NOT_FOUND")
actual_image=$(docker inspect "ws-${ws_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "NOT_FOUND")
if echo "$actual_image" | grep -qF "$expected_tag"; then break; fi
sleep 5
done
@@ -216,10 +214,9 @@ if echo "$R" | grep -qF "saved"; then
curl -s -X POST "$BASE/workspaces/$RT_CX_ID/restart" > /dev/null 2>&1
# Poll up to 30s for the new container image to appear (restart can take a while)
if command -v docker &>/dev/null; then
short_id="${RT_CX_ID:0:12}"
for _ in 1 2 3 4 5 6; do
sleep 5
actual=$(docker inspect "ws-${short_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "")
actual=$(docker inspect "ws-${RT_CX_ID}" --format '{{.Config.Image}}' 2>/dev/null || echo "")
if echo "$actual" | grep -qF "openclaw"; then break; fi
done
_check_image "$RT_CX_ID" "openclaw" "Runtime change codex to openclaw on restart"
+31 -14
View File
@@ -191,8 +191,29 @@ except Exception:
}
container_running() { # container_running <ws-id> -> echoes name if running
local short="${1:0:12}"
docker ps --filter "name=ws-${short}" --filter "status=running" --format '{{.Names}}' 2>/dev/null | head -1
docker ps --filter "name=ws-${1}" --filter "status=running" --format '{{.Names}}' 2>/dev/null | head -1
}
diagnose_provision() {
local wsid="${1:-}"
local container
container=$(container_running "$wsid")
echo "--- DIAGNOSE provisioning for $wsid ---"
echo "last_sample_error: ${LAST:-<none>}"
echo "container_running: ${container:-<none>}"
if [ -n "$container" ]; then
echo "--- container logs ($container) ---"
docker logs "$container" 2>&1 | tail -n 60 || true
echo "--- container env ---"
docker inspect "$container" --format '{{json .Config.Env}}' 2>&1 || true
echo "--- container reachability test ---"
docker exec "$container" sh -c 'echo "platform_url=$PLATFORM_URL"; wget -qO- "$PLATFORM_URL/health" 2>&1 || true' || true
fi
echo "--- all ws-* containers ---"
docker ps --filter "name=ws-" --format '{{.Names}} {{.Status}}' 2>/dev/null || true
echo "--- all ws-* volumes ---"
docker volume ls -q 2>/dev/null | grep '^ws-' || true
echo "--- end diagnose ---"
}
cleanup() {
@@ -203,16 +224,11 @@ cleanup() {
# SCOPED teardown — only the workspace this test created. Never a blanket
# sweep (other dev workspaces may be live on this shared daemon).
e2e_delete_workspace "$WSID" "" >/dev/null 2>&1 || true
local short="${WSID:0:12}"
docker rm -f "ws-${short}" >/dev/null 2>&1 || true
# Volume naming is split in the provisioner: configs + claude-sessions use the
# 12-char short id (ConfigVolumeName/ClaudeSessionVolumeName), but the
# /workspace volume uses the FULL UUID (buildWorkspaceMount: ws-<id>-workspace).
# Remove BOTH forms so neither leaks.
docker rm -f "ws-${WSID}" >/dev/null 2>&1 || true
docker volume rm -f \
"ws-${short}-configs" "ws-${short}-claude-sessions" \
"ws-${short}-workspace" "ws-${WSID}-workspace" >/dev/null 2>&1 || true
echo "cleaned workspace $WSID + ws-${short} container/volumes"
"ws-${WSID}-configs" "ws-${WSID}-claude-sessions" \
"ws-${WSID}-workspace" >/dev/null 2>&1 || true
echo "cleaned workspace $WSID + ws-${WSID} container/volumes"
fi
# Restore the cache tag to whatever it pointed at before we retagged it, so a
# stub run doesn't leave the real claude-code tag aliased to the stub.
@@ -331,8 +347,7 @@ if [ -z "$WSID" ]; then
exit 1
fi
pass "workspace created: $WSID"
SHORT="${WSID:0:12}"
CONFIG_VOL="ws-${SHORT}-configs"
CONFIG_VOL="ws-${WSID}-configs"
# Mint a workspace bearer for the WorkspaceAuth-gated secret + /restart calls.
WTOKEN=$(e2e_mint_workspace_token "$WSID" || true)
@@ -436,8 +451,9 @@ for _ in $(seq 1 "$ONLINE_TIMEOUT"); do
sleep 1
done
check "workspace reached online (status=$STATUS)" "online" "$STATUS"
if [ "$FAIL" -gt 0 ]; then diagnose_provision "$WSID"; echo "=== Results: $PASS passed, $FAIL failed ==="; exit 1; fi
RUN=$(container_running "$WSID")
if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID:0:12} container" "docker ps shows none"; fi
if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID} container" "docker ps shows none"; fi
echo ""
# ----------------------------------------------------------------------------
@@ -473,6 +489,7 @@ else
sleep 1
done
check "workspace back online after restart (status=$STATUS)" "online" "$STATUS"
if [ "$FAIL" -gt 0 ]; then diagnose_provision "$WSID"; echo "=== Results: $PASS passed, $FAIL failed ==="; exit 1; fi
# Explicit negative on the exact bug signature.
if echo "$LAST" | grep -qiF "config volume is empty"; then
fail "restart hit 'config volume is empty' — restart-survival REGRESSION" "$LAST"
@@ -33,6 +33,7 @@ import (
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
@@ -239,7 +240,7 @@ func (h *WorkspaceHandler) applyConciergeProvisionConfig(
}
}
if len(base) == 0 && h.provisioner != nil {
if b, err := h.provisioner.ExecRead(ctx, configDirName(workspaceID), "/configs/config.yaml"); err == nil {
if b, err := h.provisioner.ExecRead(ctx, provisioner.ContainerName(workspaceID), "/configs/config.yaml"); err == nil {
base = b
}
}
@@ -399,7 +400,7 @@ func conciergeIdentityPresent(ctx context.Context, prov localProvisionerIsRunnin
// that doesn't expose ExecRead.
return true
}
body, err := reader.ExecRead(ctx, configDirName(id), "/configs/system-prompt.md")
body, err := reader.ExecRead(ctx, provisioner.ContainerName(id), "/configs/system-prompt.md")
if err != nil {
return false
}
@@ -15,6 +15,7 @@ import (
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provlog"
"github.com/gin-gonic/gin"
)
@@ -393,7 +394,7 @@ func (h *WorkspaceHandler) restartRuntimeFromConfig(ctx context.Context, id, wsN
return dbRuntime
}
containerRuntime := dbRuntime
containerName := configDirName(id) // ws-{id[:12]}
containerName := provisioner.ContainerName(id) // ws-{id} (KI-013 full UUID)
if cfgBytes, readErr := h.provisioner.ExecRead(ctx, containerName, "/configs/config.yaml"); readErr == nil {
for _, line := range strings.Split(string(cfgBytes), "\n") {
line = strings.TrimSpace(line)