From 782210505828bbdde140aa0b938e247e149affe4 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Tue, 9 Jun 2026 22:35:08 +0000 Subject: [PATCH 1/4] fix(e2e): use full workspace IDs for container/volume names after KI-013 (#2499) KI-013 removed 12-char UUID truncation from container/volume names. The E2E scripts were still using ws-${ID:0:12} to inspect containers and volumes, causing all local-provision E2E tests to fail (container not found). Update all affected E2E scripts to use the full workspace ID: - test_local_provision_lifecycle_e2e.sh - test_claude_code_e2e.sh - test_chat_attachments_e2e.sh - test_chat_attachments_multiruntime_e2e.sh - test_comprehensive_e2e.sh Fixes SEV #2499. Co-Authored-By: Claude Opus 4.8 --- tests/e2e/test_chat_attachments_e2e.sh | 2 +- .../test_chat_attachments_multiruntime_e2e.sh | 2 +- tests/e2e/test_claude_code_e2e.sh | 4 ++-- tests/e2e/test_comprehensive_e2e.sh | 9 +++----- .../e2e/test_local_provision_lifecycle_e2e.sh | 21 +++++++------------ 5 files changed, 14 insertions(+), 24 deletions(-) diff --git a/tests/e2e/test_chat_attachments_e2e.sh b/tests/e2e/test_chat_attachments_e2e.sh index 3d352f053..98bf3ad9b 100755 --- a/tests/e2e/test_chat_attachments_e2e.sh +++ b/tests/e2e/test_chat_attachments_e2e.sh @@ -76,7 +76,7 @@ fi log "Step 3 — Seed a file inside /workspace and ask agent to reference it" # Relies on /workspace being writable by the platform (we copy as root via # docker exec, mimicking the path a real agent would use through its tools). -CONTAINER=$(docker ps --format '{{.Names}}' | grep -E "^ws-${WSID:0:12}" | head -1) +CONTAINER=$(docker ps --format '{{.Names}}' | grep -E "^ws-${WSID}" | head -1) [ -n "$CONTAINER" ] || { echo "container not found"; exit 1; } docker exec "$CONTAINER" sh -c 'echo "E2E report body $(date -u +%s)" > /workspace/e2e-report.txt' diff --git a/tests/e2e/test_chat_attachments_multiruntime_e2e.sh b/tests/e2e/test_chat_attachments_multiruntime_e2e.sh index 3d93e1860..a17b70de9 100755 --- a/tests/e2e/test_chat_attachments_multiruntime_e2e.sh +++ b/tests/e2e/test_chat_attachments_multiruntime_e2e.sh @@ -145,7 +145,7 @@ check_runtime() { fails=$((fails + 1)); return fi local container - container=$(docker ps --format '{{.Names}}' | grep -E "^ws-${wsid:0:12}" | head -1) + container=$(docker ps --format '{{.Names}}' | grep -E "^ws-${wsid}" | head -1) [ -z "$container" ] && { echo "FAIL $label: container not found"; fails=$((fails + 1)); return; } has_patch_in_container "$container" || { echo "FAIL $label: platform helpers missing"; fails=$((fails + 1)); return; } diff --git a/tests/e2e/test_claude_code_e2e.sh b/tests/e2e/test_claude_code_e2e.sh index 30ec73ded..49ab5708f 100755 --- a/tests/e2e/test_claude_code_e2e.sh +++ b/tests/e2e/test_claude_code_e2e.sh @@ -94,8 +94,8 @@ check_contains "Upload child prompt" "replaced" "$CHILD_UPLOAD" # Verify prompts in containers sleep 2 -ROOT_CONTAINER=$(docker ps --filter "name=ws-${ROOT:0:12}" -q | head -1) -CHILD_CONTAINER=$(docker ps --filter "name=ws-${CHILD:0:12}" -q | head -1) +ROOT_CONTAINER=$(docker ps --filter "name=ws-${ROOT}" -q | head -1) +CHILD_CONTAINER=$(docker ps --filter "name=ws-${CHILD}" -q | head -1) ROOT_HAS_PROMPT=$(docker exec $ROOT_CONTAINER cat /configs/system-prompt.md 2>/dev/null | head -1) check_contains "Root container has prompt" "Root Agent" "$ROOT_HAS_PROMPT" diff --git a/tests/e2e/test_comprehensive_e2e.sh b/tests/e2e/test_comprehensive_e2e.sh index 96370e26c..0c57ae260 100755 --- a/tests/e2e/test_comprehensive_e2e.sh +++ b/tests/e2e/test_comprehensive_e2e.sh @@ -153,19 +153,17 @@ RT_HM_ID=$(echo "$R" | jq_extract "['id']") # Wait for containers to start (poll up to 30s for first one to appear) if command -v docker &>/dev/null; then - short_cc="${RT_CC_ID:0:12}" for _ in 1 2 3 4 5 6; do sleep 5 - if docker inspect "ws-${short_cc}" >/dev/null 2>&1; then break; fi + if docker inspect "ws-${RT_CC_ID}" >/dev/null 2>&1; then break; fi done _check_image() { local ws_id="$1" expected_tag="$2" label="$3" - local short_id="${ws_id:0:12}" # Poll up to 30s for image to appear local actual_image="NOT_FOUND" for _ in 1 2 3 4 5 6; do - actual_image=$(docker inspect "ws-${short_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "NOT_FOUND") + actual_image=$(docker inspect "ws-${ws_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "NOT_FOUND") if echo "$actual_image" | grep -qF "$expected_tag"; then break; fi sleep 5 done @@ -216,10 +214,9 @@ if echo "$R" | grep -qF "saved"; then curl -s -X POST "$BASE/workspaces/$RT_CX_ID/restart" > /dev/null 2>&1 # Poll up to 30s for the new container image to appear (restart can take a while) if command -v docker &>/dev/null; then - short_id="${RT_CX_ID:0:12}" for _ in 1 2 3 4 5 6; do sleep 5 - actual=$(docker inspect "ws-${short_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "") + actual=$(docker inspect "ws-${RT_CX_ID}" --format '{{.Config.Image}}' 2>/dev/null || echo "") if echo "$actual" | grep -qF "openclaw"; then break; fi done _check_image "$RT_CX_ID" "openclaw" "Runtime change codex to openclaw on restart" diff --git a/tests/e2e/test_local_provision_lifecycle_e2e.sh b/tests/e2e/test_local_provision_lifecycle_e2e.sh index dc985d4cf..b4623f7f4 100755 --- a/tests/e2e/test_local_provision_lifecycle_e2e.sh +++ b/tests/e2e/test_local_provision_lifecycle_e2e.sh @@ -191,8 +191,7 @@ except Exception: } container_running() { # container_running -> echoes name if running - local short="${1:0:12}" - docker ps --filter "name=ws-${short}" --filter "status=running" --format '{{.Names}}' 2>/dev/null | head -1 + docker ps --filter "name=ws-${1}" --filter "status=running" --format '{{.Names}}' 2>/dev/null | head -1 } cleanup() { @@ -203,16 +202,11 @@ cleanup() { # SCOPED teardown — only the workspace this test created. Never a blanket # sweep (other dev workspaces may be live on this shared daemon). e2e_delete_workspace "$WSID" "" >/dev/null 2>&1 || true - local short="${WSID:0:12}" - docker rm -f "ws-${short}" >/dev/null 2>&1 || true - # Volume naming is split in the provisioner: configs + claude-sessions use the - # 12-char short id (ConfigVolumeName/ClaudeSessionVolumeName), but the - # /workspace volume uses the FULL UUID (buildWorkspaceMount: ws--workspace). - # Remove BOTH forms so neither leaks. + docker rm -f "ws-${WSID}" >/dev/null 2>&1 || true docker volume rm -f \ - "ws-${short}-configs" "ws-${short}-claude-sessions" \ - "ws-${short}-workspace" "ws-${WSID}-workspace" >/dev/null 2>&1 || true - echo "cleaned workspace $WSID + ws-${short} container/volumes" + "ws-${WSID}-configs" "ws-${WSID}-claude-sessions" \ + "ws-${WSID}-workspace" >/dev/null 2>&1 || true + echo "cleaned workspace $WSID + ws-${WSID} container/volumes" fi # Restore the cache tag to whatever it pointed at before we retagged it, so a # stub run doesn't leave the real claude-code tag aliased to the stub. @@ -331,8 +325,7 @@ if [ -z "$WSID" ]; then exit 1 fi pass "workspace created: $WSID" -SHORT="${WSID:0:12}" -CONFIG_VOL="ws-${SHORT}-configs" +CONFIG_VOL="ws-${WSID}-configs" # Mint a workspace bearer for the WorkspaceAuth-gated secret + /restart calls. WTOKEN=$(e2e_mint_workspace_token "$WSID" || true) @@ -437,7 +430,7 @@ for _ in $(seq 1 "$ONLINE_TIMEOUT"); do done check "workspace reached online (status=$STATUS)" "online" "$STATUS" RUN=$(container_running "$WSID") -if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID:0:12} container" "docker ps shows none"; fi +if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID} container" "docker ps shows none"; fi echo "" # ---------------------------------------------------------------------------- -- 2.52.0 From 07040361b28df6abc4db294b55f6017375446d6e Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Tue, 9 Jun 2026 22:50:49 +0000 Subject: [PATCH 2/4] harden(ci): add SEV-2499 drift-prevention guard for KI-013 container naming (#2500) Add lint-e2e-ki013-container-names.sh that scans tests/e2e/*.sh for any ${VAR:0:12} truncation patterns. KI-013 removed 12-char UUID truncation from container/volume names; reintroducing it in E2E scripts causes the container-not-found failures that created SEV #2499. Wired into the Shellcheck (E2E scripts) CI job so every PR touching E2E scripts is automatically guarded. Co-Authored-By: Claude Opus 4.8 --- .../scripts/lint-e2e-ki013-container-names.sh | 36 +++++++++++++++++++ .gitea/workflows/ci.yml | 8 +++++ 2 files changed, 44 insertions(+) create mode 100755 .gitea/scripts/lint-e2e-ki013-container-names.sh diff --git a/.gitea/scripts/lint-e2e-ki013-container-names.sh b/.gitea/scripts/lint-e2e-ki013-container-names.sh new file mode 100755 index 000000000..2fcfeaa82 --- /dev/null +++ b/.gitea/scripts/lint-e2e-ki013-container-names.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Drift-prevention guard: SEV #2499 class (KI-013 container/volume naming). +# +# KI-013 removed 12-char UUID truncation from container/volume names. +# E2E scripts must use FULL workspace IDs (ws-${WSID}) when referencing +# containers and volumes. Any ${VAR:0:12} truncation in a ws-* context +# is a regression risk. +# +# Run: bash .gitea/scripts/lint-e2e-ki013-container-names.sh +set -euo pipefail + +PAT=':0:12([^0-9]|$)' +ERR=0 + +for f in tests/e2e/*.sh; do + # Allow :0:12 when it is NOT inside a ws-* container/volume reference. + # The grep looks for ws- followed anywhere on the same line by ${*:0:12. + MATCHES=$(grep -nE "$PAT" "$f" 2>/dev/null || true) + if [ -n "$MATCHES" ]; then + echo "::error::SEV-2499 drift guard: truncated workspace ID in container/volume name" + echo "::error::file=$f" + echo "$MATCHES" | while read -r line; do + echo "::error:: $line" + done + ERR=1 + fi +done + +if [ "$ERR" -ne 0 ]; then + echo "" + echo "FAIL: E2E scripts reference containers/volumes with 12-char truncated IDs." + echo " KI-013 requires FULL workspace IDs. Update the flagged lines." + exit 1 +fi + +echo "PASS: No truncated workspace IDs in E2E container/volume references." diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 6555b80b6..d2e18494e 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -404,6 +404,14 @@ jobs: run: | bash scripts/test-promote-tenant-image.sh + - if: ${{ needs.changes.outputs.scripts == 'true' }} + name: Drift guard — KI-013 container/volume naming (SEV #2499) + # KI-013 removed 12-char UUID truncation from container/volume names. + # E2E scripts must use FULL workspace IDs. This fail-closed guard + # prevents regressions where a new/modified script reintroduces the + # old truncated-name pattern (the root cause of SEV #2499). + run: bash .gitea/scripts/lint-e2e-ki013-container-names.sh + - if: ${{ needs.changes.outputs.scripts == 'true' }} name: Shellcheck promote-tenant-image script # scripts/ is excluded from the bulk shellcheck pass above (legacy -- 2.52.0 From 82a3f23540c153539e2bf834e69bff5f77375594 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 10 Jun 2026 01:55:31 +0000 Subject: [PATCH 3/4] test(e2e): add provisioning diagnostics to local-lifecycle (#2500) The Local Provision Lifecycle E2E (stub) is failing with workspace stuck in provisioning for 90s. The runtime container is running but we have no visibility into why register/heartbeat is not flipping status to online. Add a diagnose_provision helper that dumps container logs, env, a reachability test, and the ws-* container/volume inventory whenever the online check fails. This turns the next CI failure into an actionable root-cause signal for SEV-2499. Refs #2499 --- .../e2e/test_local_provision_lifecycle_e2e.sh | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/e2e/test_local_provision_lifecycle_e2e.sh b/tests/e2e/test_local_provision_lifecycle_e2e.sh index b4623f7f4..1fa325799 100755 --- a/tests/e2e/test_local_provision_lifecycle_e2e.sh +++ b/tests/e2e/test_local_provision_lifecycle_e2e.sh @@ -194,6 +194,28 @@ container_running() { # container_running -> echoes name if running docker ps --filter "name=ws-${1}" --filter "status=running" --format '{{.Names}}' 2>/dev/null | head -1 } +diagnose_provision() { + local wsid="${1:-}" + local container + container=$(container_running "$wsid") + echo "--- DIAGNOSE provisioning for $wsid ---" + echo "last_sample_error: ${LAST:-}" + echo "container_running: ${container:-}" + if [ -n "$container" ]; then + echo "--- container logs ($container) ---" + docker logs "$container" 2>&1 | tail -n 60 || true + echo "--- container env ---" + docker inspect "$container" --format '{{json .Config.Env}}' 2>&1 || true + echo "--- container reachability test ---" + docker exec "$container" sh -c 'echo "platform_url=$PLATFORM_URL"; wget -qO- "$PLATFORM_URL/health" 2>&1 || true' || true + fi + echo "--- all ws-* containers ---" + docker ps --filter "name=ws-" --format '{{.Names}} {{.Status}}' 2>/dev/null || true + echo "--- all ws-* volumes ---" + docker volume ls -q 2>/dev/null | grep '^ws-' || true + echo "--- end diagnose ---" +} + cleanup() { local rc=$? echo "" @@ -429,6 +451,7 @@ for _ in $(seq 1 "$ONLINE_TIMEOUT"); do sleep 1 done check "workspace reached online (status=$STATUS)" "online" "$STATUS" +if [ "$FAIL" -gt 0 ]; then diagnose_provision "$WSID"; echo "=== Results: $PASS passed, $FAIL failed ==="; exit 1; fi RUN=$(container_running "$WSID") if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID} container" "docker ps shows none"; fi echo "" @@ -466,6 +489,7 @@ else sleep 1 done check "workspace back online after restart (status=$STATUS)" "online" "$STATUS" + if [ "$FAIL" -gt 0 ]; then diagnose_provision "$WSID"; echo "=== Results: $PASS passed, $FAIL failed ==="; exit 1; fi # Explicit negative on the exact bug signature. if echo "$LAST" | grep -qiF "config volume is empty"; then fail "restart hit 'config volume is empty' — restart-survival REGRESSION" "$LAST" -- 2.52.0 From b9dd026341c7077803b7de3ea75823b9572a18f8 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 10 Jun 2026 02:04:18 +0000 Subject: [PATCH 4/4] fix(handlers): use full-ID container names for ExecRead post-KI-013 (#2500) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KI-013 changed workspace container names from ws-{id[:12]} to ws-{id}. Three call sites were still passing configDirName(id) (the truncated config-directory name) to provisioner.ExecRead, so post-deploy ExecRead probes into running containers silently failed with 'No such container'. Updates: - workspace_restart.go: runtime config probe uses provisioner.ContainerName(id) - platform_agent.go: concierge identity overlay + system-prompt detection use provisioner.ContainerName(workspaceID) These failures were silent (err == nil guard fell through), so they did not surface as hard errors, but they caused platform-agent identity misses and runtime-change detection misses — part of the SEV-2499 symptom class. Refs #2499 --- workspace-server/internal/handlers/platform_agent.go | 5 +++-- workspace-server/internal/handlers/workspace_restart.go | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/handlers/platform_agent.go b/workspace-server/internal/handlers/platform_agent.go index 07412ecf3..23ea4cefd 100644 --- a/workspace-server/internal/handlers/platform_agent.go +++ b/workspace-server/internal/handlers/platform_agent.go @@ -33,6 +33,7 @@ import ( "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models" + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner" "github.com/gin-gonic/gin" "github.com/google/uuid" ) @@ -239,7 +240,7 @@ func (h *WorkspaceHandler) applyConciergeProvisionConfig( } } if len(base) == 0 && h.provisioner != nil { - if b, err := h.provisioner.ExecRead(ctx, configDirName(workspaceID), "/configs/config.yaml"); err == nil { + if b, err := h.provisioner.ExecRead(ctx, provisioner.ContainerName(workspaceID), "/configs/config.yaml"); err == nil { base = b } } @@ -399,7 +400,7 @@ func conciergeIdentityPresent(ctx context.Context, prov localProvisionerIsRunnin // that doesn't expose ExecRead. return true } - body, err := reader.ExecRead(ctx, configDirName(id), "/configs/system-prompt.md") + body, err := reader.ExecRead(ctx, provisioner.ContainerName(id), "/configs/system-prompt.md") if err != nil { return false } diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 4006912ed..10ae642bb 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -15,6 +15,7 @@ import ( "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models" + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provlog" "github.com/gin-gonic/gin" ) @@ -393,7 +394,7 @@ func (h *WorkspaceHandler) restartRuntimeFromConfig(ctx context.Context, id, wsN return dbRuntime } containerRuntime := dbRuntime - containerName := configDirName(id) // ws-{id[:12]} + containerName := provisioner.ContainerName(id) // ws-{id} (KI-013 full UUID) if cfgBytes, readErr := h.provisioner.ExecRead(ctx, containerName, "/configs/config.yaml"); readErr == nil { for _, line := range strings.Split(string(cfgBytes), "\n") { line = strings.TrimSpace(line) -- 2.52.0