chore: retrigger CI — Local Provision E2E stub failed on provisioning timeout (infra flake on main, unrelated to scheduler test addition)

Merge branch 'main' into fix/add-missing-scheduler-unit-tests
Merge pull request 'test(middleware): add missing unit tests for tenantSlug and cpSessionVerifyURL' (#2485 ) from fix/add-missing-middleware-unit-tests into main
2026-06-09 18:15:32 +00:00 · 2026-06-09 18:08:39 +00:00 · 2026-06-09 18:08:14 +00:00 · 2026-06-09 17:38:55 +00:00 · 2026-06-09 17:38:53 +00:00 · 2026-06-09 17:23:14 +00:00
16 changed files with 689 additions and 100 deletions
@@ -66,6 +66,14 @@ def build_plan(env: dict[str, str]) -> dict:
        "target_tag": target_tag,
        "soak_seconds": _int_env(env, "PROD_AUTO_DEPLOY_SOAK_SECONDS", 60, minimum=0),
        "batch_size": _int_env(env, "PROD_AUTO_DEPLOY_BATCH_SIZE", 3),
+        # Tolerate a small minority of individually-stuck tenants (e.g. a wedged
+        # data volume that won't recreate). They are QUARANTINED — shipped past
+        # so the healthy majority still lands the build — and reported for
+        # separate recovery, instead of one stuck tenant blocking the whole
+        # fleet deploy. The canary still must pass, the CP halts a batch the
+        # moment failures exceed this, and the cross-batch coverage gate below
+        # enforces the same tolerance globally. Default 1.
+        "max_stragglers": _int_env(env, "PROD_AUTO_DEPLOY_MAX_STRAGGLERS", 1, minimum=0),
        "dry_run": truthy_flag(env.get("PROD_AUTO_DEPLOY_DRY_RUN", "")),
        # confirm:true ack required by CP /cp/admin/tenants/redeploy-fleet
        # contract (cp#228 / task #308) for fleet-wide intent. Empty body
@@ -251,26 +259,41 @@ def rollout_stragglers(enumerated: list[str], results: list[dict]) -> list[str]:
    return sorted(s for s in dict.fromkeys(enumerated) if s not in verified)


-def assert_full_coverage(enumerated: list[str], aggregate: dict, dry_run: bool) -> None:
-    """Fail the rollout if any enumerated tenant is not on the target build.
+def assert_full_coverage(
+    enumerated: list[str], aggregate: dict, dry_run: bool, max_stragglers: int = 0
+) -> None:
+    """Gate the rollout on coverage, tolerating a quarantined straggler minority.

-    This is the no-silent-skip gate (internal#724). A dry run proves
-    nothing landed, so coverage is not asserted for it.
+    This is the no-silent-skip gate (internal#724) made resilient: every
+    enumerated tenant must be PROVEN on the target build, EXCEPT up to
+    ``max_stragglers`` individually-stuck tenants which are quarantined (shipped
+    past) and reported for separate recovery instead of blocking the whole
+    fleet deploy. Exceeding the tolerance is a systemic failure → RolloutFailed.
+    A dry run proves nothing landed, so coverage is not asserted for it.
    """

    if dry_run:
        return
    stragglers = rollout_stragglers(enumerated, aggregate.get("results") or [])
-    if stragglers:
+    if not stragglers:
+        return
+    # Surface the stragglers (for the step summary + recovery), gate or not.
+    aggregate["stragglers"] = stragglers
+    if len(stragglers) > max_stragglers:
        msg = (
            f"incomplete rollout: {len(stragglers)} tenant(s) not verified on target "
-            f"after redeploy-fleet: {', '.join(stragglers)} "
+            f"after redeploy-fleet (max tolerated {max_stragglers}): {', '.join(stragglers)} "
            f"(enumerated {len(set(enumerated))})"
        )
        aggregate["ok"] = False
        aggregate["error"] = msg
-        aggregate["stragglers"] = stragglers
        raise RolloutFailed(msg, aggregate)
+    # Within tolerance: shipped to the healthy majority; quarantine is loud,
+    # not fatal. The deploy succeeds; the stragglers need individual recovery.
+    print(
+        f"::warning::quarantined {len(stragglers)} straggler(s) (<= max {max_stragglers}); "
+        f"shipped to the rest of the fleet — these need recovery: {', '.join(stragglers)}"
+    )


 def execute_scoped_rollout(
@@ -325,7 +348,8 @@ def execute_scoped_rollout(
    # or one enumerated but never batched, is a straggler. Surfacing it as
    # a RolloutFailed makes the deploy step exit non-zero instead of
    # silently reporting success (the exact agents-team failure mode).
-    assert_full_coverage(all_slugs, aggregate, dry_run)
+    max_stragglers = int(base_body.get("max_stragglers") or 0)
+    assert_full_coverage(all_slugs, aggregate, dry_run, max_stragglers)

    return aggregate

@@ -351,7 +351,8 @@ def compute_ack_state(
            latest_directive[(user, slug)] = kind

    # Step 2: build candidate ackers per slug.
-    # Filter out self-acks and unknown slugs.
+    # Filter out self-acks and unknown slugs. Author self-ack is forbidden
+    # per .gitea/sop-checklist-config.yaml — a non-author peer must ack.
    ackers_per_slug: dict[str, list[str]] = {s: [] for s in items_by_slug}
    rejected_self: dict[str, list[str]] = {s: [] for s in items_by_slug}
    pending_team_check: dict[str, list[str]] = {s: [] for s in items_by_slug}
@@ -35,6 +35,9 @@ def test_build_plan_defaults_to_staging_sha_target_and_prod_cp():
        "canary_slug": "hongming",
        "soak_seconds": 60,
        "batch_size": 3,
+        # quarantine up to 1 individually-stuck tenant rather than blocking the
+        # whole fleet deploy (default).
+        "max_stragglers": 1,
        "dry_run": False,
        # cp#228 / task #308: fleet-wide intent must carry confirm:true.
        "confirm": True,
@@ -470,6 +473,72 @@ def test_scoped_rollout_passes_when_all_tenants_verified_on_target():
    assert "stragglers" not in aggregate


+def test_scoped_rollout_quarantines_straggler_within_tolerance():
+    # reno-stars never verifies on target; max_stragglers=1 tolerates it — the
+    # rollout still succeeds (ships to the healthy majority) and reports the
+    # quarantined straggler instead of failing the whole deploy.
+    def fake_redeploy(_cp_url, _token, body):
+        return 200, {
+            "ok": True,
+            "results": [
+                {"slug": s, "verified_on_target": (s != "reno-stars")}
+                for s in body["only_slugs"]
+            ],
+        }
+
+    aggregate = prod.execute_scoped_rollout(
+        {
+            "cp_url": "https://api.moleculesai.app",
+            "body": {
+                "target_tag": "staging-new",
+                "batch_size": 5,
+                "dry_run": False,
+                "confirm": True,
+                "max_stragglers": 1,
+            },
+        },
+        token="secret",
+        list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
+        redeploy=fake_redeploy,
+        sleep=lambda _s: None,
+    )
+    assert aggregate["ok"] is True
+    assert aggregate["stragglers"] == ["reno-stars"]
+
+
+def test_scoped_rollout_fails_when_stragglers_exceed_tolerance():
+    # Two tenants never verify; with max_stragglers=1 that is systemic → fail.
+    def fake_redeploy(_cp_url, _token, body):
+        return 200, {
+            "ok": True,
+            "results": [
+                {"slug": s, "verified_on_target": (s == "hongming")}
+                for s in body["only_slugs"]
+            ],
+        }
+
+    try:
+        prod.execute_scoped_rollout(
+            {
+                "cp_url": "https://api.moleculesai.app",
+                "body": {
+                    "target_tag": "staging-new",
+                    "batch_size": 5,
+                    "dry_run": False,
+                    "confirm": True,
+                    "max_stragglers": 1,
+                },
+            },
+            token="secret",
+            list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
+            redeploy=fake_redeploy,
+            sleep=lambda _s: None,
+        )
+        raise AssertionError("expected RolloutFailed when stragglers exceed tolerance")
+    except prod.RolloutFailed as exc:
+        assert "max tolerated 1" in str(exc)
+
+
 def test_scoped_rollout_dry_run_does_not_assert_coverage():
    # A dry run proves nothing landed; coverage must NOT be asserted or
    # every plan would fail.
@@ -291,7 +291,8 @@ class TestComputeAckState(unittest.TestCase):
        )
        self.assertEqual(state["comprehensive-testing"]["ackers"], ["bob"])

-    def test_self_ack_rejected(self):
+    def test_self_ack_rejected_when_author_in_team(self):
+        # Author self-acks are forbidden — a non-author peer must ack.
        comments = [_comment("alice", "/sop-ack comprehensive-testing")]
        state = sop.compute_ack_state(
            comments, "alice", self.items, self.aliases, self._approve_all
@@ -722,16 +723,16 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
        )
        self.assertEqual(state["root-cause"]["ackers"], ["hongming"])

-    def test_self_ack_still_forbidden_even_with_widened_eligibility(self):
-        # Author cannot self-ack — widening teams must NOT weaken
-        # the non-author rule.
+    def test_self_ack_rejected_with_widened_eligibility(self):
+        # Author self-acks are forbidden even when the author is in the
+        # required team — a non-author peer must ack.
        comments = [_comment("alice", "/sop-ack root-cause")]
        probe = self._approve_only({"alice"})
        state = sop.compute_ack_state(
            comments, "alice", self.items, self.aliases, probe, high_risk=False
        )
        self.assertEqual(state["root-cause"]["ackers"], [])
-        self.assertIn("alice", state["root-cause"]["rejected"]["self_ack"])
+        self.assertEqual(state["root-cause"]["rejected"]["self_ack"], ["alice"])


 class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):
@@ -165,6 +165,28 @@ jobs:
          cache: 'npm'
          cache-dependency-path: canvas/package-lock.json

+      - name: Sweep stale e2e-chat testcontainers (self-heal prior leaks)
+        if: needs.detect-changes.outputs.chat == 'true'
+        run: |
+          # Prior e2e-chat runs that were cancelled/killed — or whose always()
+          # cleanup hit a wedged docker daemon — leak their pg-/redis-e2e-chat-*
+          # containers, which then pile up on the shared runner host (observed: 13
+          # such containers, up to 2 weeks old, on the operator daemon). Reap any
+          # e2e-chat container older than the job window so leaks self-heal every
+          # run instead of relying on each run's own cleanup succeeding. Age-based
+          # (>2h, well beyond the 15m job) so a CONCURRENT e2e-chat job's fresh
+          # containers are never touched. See controlplane#646.
+          now=$(date -u +%s)
+          docker ps -a --filter name=e2e-chat --format '{{.Names}}' | while read -r c; do
+            [ -n "$c" ] || continue
+            created=$(docker inspect -f '{{.Created}}' "$c" 2>/dev/null) || continue
+            cts=$(date -u -d "$created" +%s 2>/dev/null) || continue
+            if [ $(( now - cts )) -gt 7200 ]; then
+              echo "sweeping stale e2e-chat container $c (created $created)"
+              timeout 30 docker rm -f "$c" >/dev/null 2>&1 || true
+            fi
+          done
+
      - name: Start Postgres (docker)
        if: needs.detect-changes.outputs.chat == 'true'
        run: |
@@ -430,5 +452,7 @@ jobs:
      - name: Stop service containers
        if: always() && needs.detect-changes.outputs.chat == 'true'
        run: |
-          docker rm -f "$PG_CONTAINER" 2>/dev/null || true
-          docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
+          # timeout-wrap so a wedged docker daemon can't hang this always() step
+          # (a hung rm here is one way containers leak in the first place).
+          timeout 30 docker rm -f "$PG_CONTAINER" 2>/dev/null || true
+          timeout 30 docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
@@ -78,6 +78,12 @@ jobs:
      # even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
      MOLECULE_ENV: development
      SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
+      # act_runner runs the job inside a Docker container, so /.dockerenv exists
+      # and the platform auto-detects platformInDocker=true. But the job container
+      # is NOT on molecule-core-net, so it cannot resolve workspace container
+      # hostnames (ws-<id>:8000). Force false so the proxy keeps using the
+      # host-mapped 127.0.0.1:<ephemeral_port> URL, which IS reachable.
+      MOLECULE_IN_DOCKER: false
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
@@ -132,7 +138,29 @@ jobs:
          # jobs or stale processes from prior cancelled runs (see #2450).
          PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
          echo "PORT=${PORT}" >> "$GITHUB_ENV"
-          echo "BASE=http://localhost:${PORT}" >> "$GITHUB_ENV"
+          echo "BASE=http://127.0.0.1:${PORT}" >> "$GITHUB_ENV"
+          # Discover an IP that Docker containers can use to reach the host platform.
+          # host.docker.internal is not reliably available on Linux (act_runner), so
+          # workspace containers cannot resolve it and fail to register/heartbeat.
+          # Workspace containers join molecule-core-net; the host is reachable via that
+          # network's gateway. Ensure the network exists first (the provisioner creates
+          # it lazily, but we need the gateway BEFORE starting the platform).
+          docker network inspect molecule-core-net >/dev/null 2>&1 || docker network create molecule-core-net >/dev/null
+          # Parse Gateway from raw JSON because --format '{{.IPAM.Config}}' is
+          # inconsistent across Docker versions (sometimes omits Gateway field).
+          PLATFORM_HOST_IP=$(docker network inspect molecule-core-net 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
+          if [ -z "$PLATFORM_HOST_IP" ]; then
+            PLATFORM_HOST_IP=$(docker network inspect bridge 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
+          fi
+          if [ -z "$PLATFORM_HOST_IP" ]; then
+            PLATFORM_HOST_IP=$(ip route | awk '/default/ {print $3}' | head -1 || true)
+          fi
+          if [ -z "$PLATFORM_HOST_IP" ]; then
+            echo "::error::Could not determine PLATFORM_HOST_IP for Docker containers to reach the platform"
+            exit 1
+          fi
+          echo "PLATFORM_HOST_IP=${PLATFORM_HOST_IP}"
+          echo "PLATFORM_URL=http://${PLATFORM_HOST_IP}:${PORT}" >> "$GITHUB_ENV"
          # Deterministic admin token: the script sends MOLECULE_ADMIN_TOKEN as the
          # bearer; the platform checks ADMIN_TOKEN. Set both to the same value.
          T="lpe2e-admin-${{ github.run_id }}-${{ github.run_attempt }}"
@@ -173,8 +201,10 @@ jobs:
        run: |
          # Bind to the dynamically allocated port (see #2450).
          # DATABASE_URL/REDIS_URL/ADMIN_TOKEN/MOLECULE_ENV are inherited from
-          # $GITHUB_ENV.
-          PORT=$PORT ./platform-server > platform.log 2>&1 &
+          # $GITHUB_ENV. PLATFORM_URL is also passed explicitly because
+          # $GITHUB_ENV propagation can be flaky on act_runner (#2468 RCA).
+          echo "starting platform with PLATFORM_URL=${PLATFORM_URL:-<fallback>} PORT=$PORT BIND_ADDR=0.0.0.0"
+          PORT=$PORT BIND_ADDR=0.0.0.0 PLATFORM_URL="${PLATFORM_URL:-http://host.docker.internal:$PORT}" ./platform-server > platform.log 2>&1 &
          echo $! > platform.pid

      - name: Wait for /health (+ migrations applied)
@@ -198,6 +228,11 @@ jobs:
            sleep 1
          done

+      - name: Verify platform reachable from molecule-core-net
+        run: |
+          echo "Testing platform reachability from molecule-core-net container..."
+          docker run --rm --network molecule-core-net alpine:latest sh -c "wget -qO- http://${PLATFORM_URL#http://}/health" || echo "WARN: platform not reachable from molecule-core-net"
+
      - name: Run local-provision lifecycle E2E (stub — REQUIRED)
        run: bash tests/e2e/test_local_provision_lifecycle_e2e.sh

@@ -205,6 +240,15 @@ jobs:
        if: failure()
        run: cat workspace-server/platform.log || true

+      - name: Dump workspace container logs on failure
+        if: failure()
+        run: |
+          WS_NAME=$(docker ps --filter "name=ws-" --format '{{.Names}}' | head -1 || true)
+          if [ -n "$WS_NAME" ]; then
+            echo "=== Workspace container logs for $WS_NAME ==="
+            docker logs "$WS_NAME" 2>&1 | tail -n 80 || true
+          fi
+
      - name: Stop platform
        if: always()
        run: |
@@ -248,6 +292,12 @@ jobs:
      # even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
      MOLECULE_ENV: development
      SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
+      # act_runner runs the job inside a Docker container, so /.dockerenv exists
+      # and the platform auto-detects platformInDocker=true. But the job container
+      # is NOT on molecule-core-net, so it cannot resolve workspace container
+      # hostnames (ws-<id>:8000). Force false so the proxy keeps using the
+      # host-mapped 127.0.0.1:<ephemeral_port> URL, which IS reachable.
+      MOLECULE_IN_DOCKER: false
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
@@ -297,7 +347,29 @@ jobs:
          # jobs or stale processes from prior cancelled runs (see #2450).
          PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
          echo "PORT=${PORT}" >> "$GITHUB_ENV"
-          echo "BASE=http://localhost:${PORT}" >> "$GITHUB_ENV"
+          echo "BASE=http://127.0.0.1:${PORT}" >> "$GITHUB_ENV"
+          # Discover an IP that Docker containers can use to reach the host platform.
+          # host.docker.internal is not reliably available on Linux (act_runner), so
+          # workspace containers cannot resolve it and fail to register/heartbeat.
+          # Workspace containers join molecule-core-net; the host is reachable via that
+          # network's gateway. Ensure the network exists first (the provisioner creates
+          # it lazily, but we need the gateway BEFORE starting the platform).
+          docker network inspect molecule-core-net >/dev/null 2>&1 || docker network create molecule-core-net >/dev/null
+          # Parse Gateway from raw JSON because --format '{{.IPAM.Config}}' is
+          # inconsistent across Docker versions (sometimes omits Gateway field).
+          PLATFORM_HOST_IP=$(docker network inspect molecule-core-net 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
+          if [ -z "$PLATFORM_HOST_IP" ]; then
+            PLATFORM_HOST_IP=$(docker network inspect bridge 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
+          fi
+          if [ -z "$PLATFORM_HOST_IP" ]; then
+            PLATFORM_HOST_IP=$(ip route | awk '/default/ {print $3}' | head -1 || true)
+          fi
+          if [ -z "$PLATFORM_HOST_IP" ]; then
+            echo "::error::Could not determine PLATFORM_HOST_IP for Docker containers to reach the platform"
+            exit 1
+          fi
+          echo "PLATFORM_HOST_IP=${PLATFORM_HOST_IP}"
+          echo "PLATFORM_URL=http://${PLATFORM_HOST_IP}:${PORT}" >> "$GITHUB_ENV"
          T="lpe2e-real-admin-${{ github.run_id }}-${{ github.run_attempt }}"
          echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
          echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
@@ -329,7 +401,8 @@ jobs:
      - name: Start platform (background)
        working-directory: workspace-server
        run: |
-          PORT=$PORT ./platform-server > platform.log 2>&1 &
+          echo "starting platform with PLATFORM_URL=${PLATFORM_URL:-<fallback>} PORT=$PORT BIND_ADDR=0.0.0.0"
+          PORT=$PORT BIND_ADDR=0.0.0.0 PLATFORM_URL="${PLATFORM_URL:-http://host.docker.internal:$PORT}" ./platform-server > platform.log 2>&1 &
          echo $! > platform.pid

      - name: Wait for /health (+ migrations applied)
@@ -351,6 +424,11 @@ jobs:
            sleep 1
          done

+      - name: Verify platform reachable from molecule-core-net
+        run: |
+          echo "Testing platform reachability from molecule-core-net container..."
+          docker run --rm --network molecule-core-net alpine:latest sh -c "wget -qO- http://${PLATFORM_URL#http://}/health" || echo "WARN: platform not reachable from molecule-core-net"
+
      - name: Run local-provision lifecycle E2E (real image + MiniMax LLM — ADVISORY)
        env:
          # LIFECYCLE_LLM=minimax: provision the REAL claude-code template image
@@ -375,6 +453,15 @@ jobs:
        if: failure()
        run: cat workspace-server/platform.log || true

+      - name: Dump workspace container logs on failure
+        if: failure()
+        run: |
+          WS_NAME=$(docker ps --filter "name=ws-" --format '{{.Names}}' | head -1 || true)
+          if [ -n "$WS_NAME" ]; then
+            echo "=== Workspace container logs for $WS_NAME ==="
+            docker logs "$WS_NAME" 2>&1 | tail -n 80 || true
+          fi
+
      - name: Stop platform
        if: always()
        run: |
@@ -530,7 +530,20 @@ jobs:
          STALE_COUNT=0
          UNREACHABLE_COUNT=0
          UNHEALTHY_COUNT=0
+          QUARANTINED_COUNT=0
+          # Quarantined stragglers: the CP shipped the build to the healthy
+          # majority and quarantined a small minority within tolerance
+          # (max_stragglers). They are reported + recovered SEPARATELY, so they
+          # must not red the strict per-tenant verify — otherwise one stuck
+          # tenant blocks the whole deploy, the all-or-nothing trap this fixes.
+          STRAGGLERS_LIST="$(jq -r '(.stragglers // [])[]' "$RESP" 2>/dev/null || true)"
+          is_straggler() { printf '%s\n' "$STRAGGLERS_LIST" | grep -qxF "$1"; }
          for slug in "${SLUGS[@]}"; do
+            if is_straggler "$slug"; then
+              echo "::warning::$slug is a QUARANTINED straggler — build shipped to the rest of the fleet; this tenant needs individual recovery. Skipping strict verify."
+              QUARANTINED_COUNT=$((QUARANTINED_COUNT + 1))
+              continue
+            fi
            healthz_ok="$(jq -r --arg slug "$slug" '.results[]? | select(.slug == $slug) | .healthz_ok' "$RESP" | tail -1)"
            if [ "$healthz_ok" != "true" ]; then
              echo "::error::$slug did not report healthz_ok=true in redeploy-fleet response."
@@ -580,6 +593,7 @@ jobs:
            echo "Stale tenants: $STALE_COUNT"
            echo "Unhealthy tenants: $UNHEALTHY_COUNT"
            echo "Unreachable tenants: $UNREACHABLE_COUNT"
+            echo "Quarantined stragglers (shipped past; need recovery): $QUARANTINED_COUNT"
          } >> "$GITHUB_STEP_SUMMARY"

          if [ "$STALE_COUNT" -gt 0 ] || [ "$UNHEALTHY_COUNT" -gt 0 ] || [ "$UNREACHABLE_COUNT" -gt 0 ]; then
@@ -40,14 +40,24 @@ export function FlightEnvelope({
    if (!el || typeof el.animate !== "function") return;
    const dx = to.x - from.x;
    const dy = to.y - from.y;
+    // Launch small from the source dot, GROW BIG as it crosses the gap (peak
+    // mid-flight), then SHRINK small as it lands on the target dot — reads as an
+    // envelope flung from one agent and received by the other. translate tracks
+    // the straight path (fraction == keyframe offset); scale arcs independently.
+    const at = (frac: number, scale: number, opacity: number, offset?: number) => ({
+      transform: `translate(-50%,-50%) translate(${dx * frac}px,${dy * frac}px) scale(${scale})`,
+      opacity,
+      ...(offset === undefined ? {} : { offset }),
+    });
    const anim = el.animate(
      [
-        { transform: "translate(-50%,-50%) translate(0px,0px) scale(0.45)", opacity: 0 },
-        { opacity: 1, offset: 0.16 },
-        { opacity: 1, offset: 0.8 },
-        { transform: `translate(-50%,-50%) translate(${dx}px,${dy}px) scale(1)`, opacity: 0 },
+        at(0, 0.5, 0),
+        at(0.2, 1.25, 1, 0.2), // faded in + grown
+        at(0.5, 1.7, 1, 0.5), // BIG at mid-flight
+        at(0.82, 1.05, 1, 0.82), // shrinking on approach
+        at(1, 0.5, 0), // small + faded out, arrived on the target dot
      ],
-      { duration: FLIGHT_DURATION_MS, easing: "cubic-bezier(0.45, 0, 0.25, 1)", fill: "forwards" },
+      { duration: FLIGHT_DURATION_MS, easing: "ease-in-out", fill: "forwards" },
    );
    return () => anim.cancel();
  }, [from.x, from.y, to.x, to.y]);
@@ -4,17 +4,25 @@
 *  Mounted INSIDE <ReactFlow> so its ViewportPortal places the envelope in flow
 *  coordinates; it therefore pans and zooms with the canvas for free. The
 *  flight lifecycle (which events become envelopes, reduced-motion opt-out,
- *  expiry) lives in useA2AFlights — this component only resolves node centres
- *  and renders. */
-import { ViewportPortal, type Node } from "@xyflow/react";
+ *  expiry) lives in useA2AFlights — this component only resolves endpoints and
+ *  renders.
+ *
+ *  Endpoints anchor on each workspace's STATUS DOT (the green/glowing presence
+ *  indicator), not the card's geometric centre — so an envelope visibly leaves
+ *  the source agent's dot and lands on the target agent's dot. The dot carries
+ *  `data-flight-anchor`; we read its rendered rect and convert screen→flow via
+ *  React Flow, falling back to the card centre only when the dot isn't in the
+ *  DOM yet (node just mounted / scrolled out). */
+import { useRef } from "react";
+import { ViewportPortal, useReactFlow, type Node } from "@xyflow/react";
 import { useCanvasStore } from "@/store/canvas";
-import { useA2AFlights } from "@/hooks/useA2AFlights";
+import { useA2AFlights, type A2AFlight } from "@/hooks/useA2AFlights";
 import { FlightEnvelope, type Point } from "./FlightEnvelope";
 import type { WorkspaceNodeData } from "@/store/canvas";

 // Fallback node footprint when React Flow has not measured a node yet. Matches
-// WorkspaceNode's leaf size (w-[300px] min-h-[176px]); a slightly-off centre
-// for the first frame after mount is invisible at flight scale.
+// WorkspaceNode's leaf size (w-[300px] min-h-[176px]); a slightly-off centre for
+// the first frame after mount is invisible at flight scale.
 const DEFAULT_W = 300;
 const DEFAULT_H = 176;

@@ -24,23 +32,76 @@ function nodeCenter(n: Node<WorkspaceNodeData>): Point {
  return { x: n.position.x + w / 2, y: n.position.y + h / 2 };
 }

+/** Resolve a node's status-dot centre in FLOW coordinates. Reads the dot's
+ *  rendered screen rect (it carries data-flight-anchor) and converts it back to
+ *  flow space, so the anchor is exact regardless of pan/zoom and survives any
+ *  header-layout change. Falls back to the card centre when the dot isn't
+ *  rendered. */
+function dotAnchor(
+  n: Node<WorkspaceNodeData>,
+  screenToFlowPosition: (p: Point) => Point,
+): Point {
+  if (typeof document !== "undefined") {
+    const id =
+      typeof CSS !== "undefined" && typeof CSS.escape === "function" ? CSS.escape(n.id) : n.id;
+    const el = document.querySelector<HTMLElement>(
+      `.react-flow__node[data-id="${id}"] [data-flight-anchor]`,
+    );
+    if (el) {
+      const r = el.getBoundingClientRect();
+      if (r.width > 0 && r.height > 0) {
+        return screenToFlowPosition({ x: r.left + r.width / 2, y: r.top + r.height / 2 });
+      }
+    }
+  }
+  return nodeCenter(n);
+}
+
+/** One flight. Captures the source/target dot anchors ONCE on mount (a ref, not
+ *  per-render) so a pan/zoom or re-render mid-flight doesn't restart the
+ *  animation — mirrors HomeFlight's capture-once contract. */
+function CanvasFlight({
+  flight,
+  nodes,
+  screenToFlowPosition,
+}: {
+  flight: A2AFlight;
+  nodes: Node<WorkspaceNodeData>[];
+  screenToFlowPosition: (p: Point) => Point;
+}) {
+  const pos = useRef<{ from: Point; to: Point } | null>(null);
+  if (pos.current === null) {
+    const src = nodes.find((n) => n.id === flight.sourceId);
+    const dst = nodes.find((n) => n.id === flight.targetId);
+    // Both endpoints must be on-canvas to draw a path between them.
+    if (src && dst) {
+      pos.current = {
+        from: dotAnchor(src, screenToFlowPosition),
+        to: dotAnchor(dst, screenToFlowPosition),
+      };
+    }
+  }
+  if (!pos.current) return null;
+  return <FlightEnvelope from={pos.current.from} to={pos.current.to} kind={flight.kind} />;
+}
+
 export function MessageFlightLayer() {
  const flights = useA2AFlights();
-  const nodes = useCanvasStore((s) => s.nodes);
+  const nodes = useCanvasStore((s) => s.nodes) as Node<WorkspaceNodeData>[];
+  const { screenToFlowPosition } = useReactFlow();

  if (flights.length === 0) return null;

  return (
    <ViewportPortal>
-      {flights.map((f) => {
-        const src = nodes.find((n) => n.id === f.sourceId);
-        const dst = nodes.find((n) => n.id === f.targetId);
-        // Both endpoints must be on-canvas to draw a path between them.
-        if (!src || !dst) return null;
-        return (
-          <FlightEnvelope key={f.key} from={nodeCenter(src)} to={nodeCenter(dst)} kind={f.kind} />
-        );
-      })}
+      {flights.map((f) => (
+        <CanvasFlight
+          key={f.key}
+          flight={f}
+          nodes={nodes}
+          screenToFlowPosition={screenToFlowPosition}
+        />
+      ))}
    </ViewportPortal>
  );
 }
@@ -215,7 +215,7 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
        {/* Header row */}
        <div className="flex items-center justify-between gap-2 mb-2.5">
          <div className="flex items-center gap-2.5 min-w-0">
-            <div className={`w-2.5 h-2.5 rounded-full shrink-0 ${statusCfg.dot} ${statusCfg.glow} shadow-sm`} />
+            <div data-flight-anchor className={`w-2.5 h-2.5 rounded-full shrink-0 ${statusCfg.dot} ${statusCfg.glow} shadow-sm`} />
            <span className="text-[15px] font-semibold text-ink truncate leading-tight">
              {data.name}
            </span>
@@ -0,0 +1,54 @@
+// @vitest-environment jsdom
+/**
+ * Tests for FlightEnvelope — the envelope that animates from `from` to `to`.
+ *
+ * Locks the render contract the canvas + concierge-home both depend on:
+ *  - the envelope is positioned at the `from` point (its launch anchor),
+ *  - it is coloured by activity kind,
+ *  - it degrades gracefully when Element.animate is unavailable (jsdom / SSR).
+ *
+ * The grow→shrink scale arc itself uses the Web Animations API, which jsdom
+ * does not implement, so we assert the static render + graceful degradation
+ * rather than keyframe values.
+ */
+import React from "react";
+import { render, cleanup } from "@testing-library/react";
+import { afterEach, describe, expect, it } from "vitest";
+import { FlightEnvelope } from "../FlightEnvelope";
+
+afterEach(cleanup);
+
+describe("FlightEnvelope", () => {
+  it("positions the envelope at the `from` launch point", () => {
+    const { getByTestId } = render(
+      <FlightEnvelope from={{ x: 120, y: 240 }} to={{ x: 400, y: 60 }} kind="send" />,
+    );
+    const el = getByTestId("flight-envelope");
+    expect(el.style.left).toBe("120px");
+    expect(el.style.top).toBe("240px");
+    expect(el.querySelector("svg")).toBeTruthy();
+  });
+
+  it("colours the envelope by activity kind", () => {
+    const stroke = (kind: "send" | "receive" | "task") => {
+      const { container } = render(
+        <FlightEnvelope from={{ x: 0, y: 0 }} to={{ x: 10, y: 10 }} kind={kind} />,
+      );
+      const s = container.querySelector("rect")?.getAttribute("stroke");
+      cleanup();
+      return s;
+    };
+    expect(stroke("send")).toBe("#22d3ee");
+    expect(stroke("receive")).toBe("#8b5cf6");
+    expect(stroke("task")).toBe("#f5a623");
+  });
+
+  it("degrades to a static render (no throw) when Element.animate is unavailable", () => {
+    // jsdom does not implement Element.animate — the component must still render.
+    expect(typeof document.createElement("div").animate).not.toBe("function");
+    const { getByTestId } = render(
+      <FlightEnvelope from={{ x: 0, y: 0 }} to={{ x: 1, y: 1 }} kind="task" />,
+    );
+    expect(getByTestId("flight-envelope")).toBeTruthy();
+  });
+});
@@ -227,3 +227,58 @@ func TestCacheKey_SlugSeparator(t *testing.T) {
 		t.Errorf("cacheKey collides on ambiguous splits")
 	}
 }
+
+func TestTenantSlug(t *testing.T) {
+	t.Setenv("MOLECULE_ORG_SLUG", "acme-corp")
+	if got := tenantSlug(); got != "acme-corp" {
+		t.Errorf("tenantSlug() = %q, want %q", got, "acme-corp")
+	}
+}
+
+func TestTenantSlug_TrimSpace(t *testing.T) {
+	t.Setenv("MOLECULE_ORG_SLUG", "  spaced-slug  ")
+	if got := tenantSlug(); got != "spaced-slug" {
+		t.Errorf("tenantSlug() = %q, want %q", got, "spaced-slug")
+	}
+}
+
+func TestTenantSlug_Empty(t *testing.T) {
+	t.Setenv("MOLECULE_ORG_SLUG", "")
+	if got := tenantSlug(); got != "" {
+		t.Errorf("tenantSlug() = %q, want empty", got)
+	}
+}
+
+func TestCPSessionVerifyURL(t *testing.T) {
+	t.Setenv("CP_UPSTREAM_URL", "https://cp.test")
+	got := cpSessionVerifyURL("acme")
+	want := "https://cp.test/cp/auth/tenant-member?slug=acme"
+	if got != want {
+		t.Errorf("cpSessionVerifyURL() = %q, want %q", got, want)
+	}
+}
+
+func TestCPSessionVerifyURL_TrailingSlash(t *testing.T) {
+	t.Setenv("CP_UPSTREAM_URL", "https://cp.test/")
+	got := cpSessionVerifyURL("acme")
+	want := "https://cp.test/cp/auth/tenant-member?slug=acme"
+	if got != want {
+		t.Errorf("cpSessionVerifyURL() = %q, want %q", got, want)
+	}
+}
+
+func TestCPSessionVerifyURL_EscapeSlug(t *testing.T) {
+	t.Setenv("CP_UPSTREAM_URL", "https://cp.test")
+	got := cpSessionVerifyURL("acme corp")
+	want := "https://cp.test/cp/auth/tenant-member?slug=acme+corp"
+	if got != want {
+		t.Errorf("cpSessionVerifyURL() = %q, want %q", got, want)
+	}
+}
+
+func TestCPSessionVerifyURL_NoCPConfigured(t *testing.T) {
+	t.Setenv("CP_UPSTREAM_URL", "")
+	if got := cpSessionVerifyURL("acme"); got != "" {
+		t.Errorf("cpSessionVerifyURL() = %q, want empty when CP_UPSTREAM_URL unset", got)
+	}
+}
@@ -253,7 +253,7 @@ func TestStart_SendsTemplateAndGeneratedConfigFiles(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(tmpl, "config.yaml"), []byte("name: template\n"), 0o600); err != nil {
 		t.Fatal(err)
 	}
-	if err := os.WriteFile(filepath.Join(tmpl, "adapter.py"), bytes.Repeat([]byte("x"), cpConfigFilesMaxBytes), 0o600); err != nil {
+	if err := os.WriteFile(filepath.Join(tmpl, "adapter.py"), bytes.Repeat([]byte("x"), cpConfigFilesMaxBytes-100), 0o600); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.Mkdir(filepath.Join(tmpl, "prompts"), 0o700); err != nil {
@@ -378,7 +378,7 @@ func TestStart_CollectsConfigFiles(t *testing.T) {
 	}
 	// adapter.py is within the size limit but is NOT config.yaml or prompts/,
 	// so isCPTemplateConfigFile must exclude it from the transport.
-	if err := os.WriteFile(filepath.Join(tmpl, "adapter.py"), bytes.Repeat([]byte("x"), cpConfigFilesMaxBytes), 0o600); err != nil {
+	if err := os.WriteFile(filepath.Join(tmpl, "adapter.py"), bytes.Repeat([]byte("x"), cpConfigFilesMaxBytes-100), 0o600); err != nil {
 		t.Fatal(err)
 	}

@@ -198,6 +198,11 @@ const (

 // ConfigVolumeName returns the Docker named volume for a workspace's configs.
 func ConfigVolumeName(workspaceID string) string {
+	return fmt.Sprintf("ws-%s-configs", workspaceID)
+}
+
+// legacyConfigVolumeName returns the pre-KI-013 truncated config volume name.
+func legacyConfigVolumeName(workspaceID string) string {
 	id := workspaceID
 	if len(id) > 12 {
 		id = id[:12]
@@ -210,6 +215,11 @@ func ConfigVolumeName(workspaceID string) string {
 // config volume so it can be discarded independently (via WORKSPACE_RESET_SESSION
 // or ?reset=true) without wiping the user's config. Issue #12.
 func ClaudeSessionVolumeName(workspaceID string) string {
+	return fmt.Sprintf("ws-%s-claude-sessions", workspaceID)
+}
+
+// legacyClaudeSessionVolumeName returns the pre-KI-013 truncated session volume name.
+func legacyClaudeSessionVolumeName(workspaceID string) string {
 	id := workspaceID
 	if len(id) > 12 {
 		id = id[:12]
@@ -233,6 +243,12 @@ func New() (*Provisioner, error) {

 // ContainerName returns the Docker container name for a workspace.
 func ContainerName(workspaceID string) string {
+	return fmt.Sprintf("ws-%s", workspaceID)
+}
+
+// legacyContainerName returns the pre-KI-013 truncated container name.
+// Used only for backward-compatible lookups during the deploy transition.
+func legacyContainerName(workspaceID string) string {
 	id := workspaceID
 	if len(id) > 12 {
 		id = id[:12]
@@ -474,7 +490,9 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
 		return "", ErrNoBackend
 	}
 	name := ContainerName(cfg.WorkspaceID)
-	configVolume := ConfigVolumeName(cfg.WorkspaceID)
+	// KI-013 deploy safety: prefer legacy truncated config volume if it
+	// already exists, so pre-deploy workspace data is not orphaned.
+	configVolume := p.resolveConfigVolumeName(ctx, cfg.WorkspaceID)

 	// Create named volume for configs (idempotent — no-op if already exists)
 	_, err := p.cli.VolumeCreate(ctx, volume.CreateOptions{
@@ -569,7 +587,9 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
 	// remove the existing volume before recreating it, so the agent
 	// boots with a clean session dir.
 	if cfg.Runtime == "claude-code" {
-		claudeSessionsVolume := ClaudeSessionVolumeName(cfg.WorkspaceID)
+		// KI-013 deploy safety: prefer legacy truncated session volume if it
+		// already exists, so pre-deploy session data is not orphaned.
+		claudeSessionsVolume := p.resolveClaudeSessionVolumeName(ctx, cfg.WorkspaceID)
 		resetEnv, _ := strconv.ParseBool(cfg.EnvVars["WORKSPACE_RESET_SESSION"])
 		if cfg.ResetClaudeSession || resetEnv {
 			if rmErr := p.cli.VolumeRemove(ctx, claudeSessionsVolume, true); rmErr != nil {
@@ -1288,7 +1308,7 @@ func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, t
 	if p == nil || p.cli == nil {
 		return ErrNoBackend
 	}
-	volName := ConfigVolumeName(workspaceID)
+	volName := p.resolveConfigVolumeName(ctx, workspaceID)
 	resp, err := p.cli.ContainerCreate(ctx, &container.Config{
 		Image: "alpine",
 		Cmd:   []string{"sh", "-c", writeAuthTokenVolumeCmd()},
@@ -1315,6 +1335,33 @@ func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, t
 	return nil
 }

+// resolveConfigVolumeName returns the effective config volume name for a
+// workspace, preferring the legacy truncated name if that volume already
+// exists (KI-013 deploy safety: pre-deploy volumes must not be orphaned).
+func (p *Provisioner) resolveConfigVolumeName(ctx context.Context, workspaceID string) string {
+	if p == nil || p.cli == nil {
+		return ConfigVolumeName(workspaceID)
+	}
+	legacy := legacyConfigVolumeName(workspaceID)
+	if _, err := p.cli.VolumeInspect(ctx, legacy); err == nil {
+		return legacy
+	}
+	return ConfigVolumeName(workspaceID)
+}
+
+// resolveClaudeSessionVolumeName returns the effective claude-sessions volume
+// name, preferring the legacy truncated name if that volume already exists.
+func (p *Provisioner) resolveClaudeSessionVolumeName(ctx context.Context, workspaceID string) string {
+	if p == nil || p.cli == nil {
+		return ClaudeSessionVolumeName(workspaceID)
+	}
+	legacy := legacyClaudeSessionVolumeName(workspaceID)
+	if _, err := p.cli.VolumeInspect(ctx, legacy); err == nil {
+		return legacy
+	}
+	return ClaudeSessionVolumeName(workspaceID)
+}
+
 // RemoveVolume removes the config volume for a workspace.
 // Also removes the claude-sessions volume (best-effort, may not exist
 // for non claude-code runtimes). Issue #12.
@@ -1322,16 +1369,22 @@ func (p *Provisioner) RemoveVolume(ctx context.Context, workspaceID string) erro
 	if p == nil || p.cli == nil {
 		return ErrNoBackend
 	}
-	volName := ConfigVolumeName(workspaceID)
-	if err := p.cli.VolumeRemove(ctx, volName, true); err != nil {
-		return fmt.Errorf("failed to remove volume %s: %w", volName, err)
+	// KI-013 deploy safety: remove both new full-ID name and legacy
+	// truncated name if present, so pre-deploy volumes are not orphaned.
+	removed := false
+	for _, volName := range []string{ConfigVolumeName(workspaceID), legacyConfigVolumeName(workspaceID)} {
+		if err := p.cli.VolumeRemove(ctx, volName, true); err == nil {
+			log.Printf("Provisioner: removed config volume %s", volName)
+			removed = true
+		}
 	}
-	log.Printf("Provisioner: removed config volume %s", volName)
-	csName := ClaudeSessionVolumeName(workspaceID)
-	if rmErr := p.cli.VolumeRemove(ctx, csName, true); rmErr != nil {
-		log.Printf("Provisioner: claude-sessions volume cleanup warning for %s: %v", csName, rmErr)
-	} else {
-		log.Printf("Provisioner: removed claude-sessions volume %s", csName)
+	if !removed {
+		return fmt.Errorf("failed to remove config volume for %s", workspaceID)
+	}
+	for _, csName := range []string{ClaudeSessionVolumeName(workspaceID), legacyClaudeSessionVolumeName(workspaceID)} {
+		if rmErr := p.cli.VolumeRemove(ctx, csName, true); rmErr == nil {
+			log.Printf("Provisioner: removed claude-sessions volume %s", csName)
+		}
 	}
 	return nil
 }
@@ -1354,37 +1407,34 @@ func (p *Provisioner) Stop(ctx context.Context, workspaceID string) error {
 	if p == nil || p.cli == nil {
 		return ErrNoBackend
 	}
-	name := ContainerName(workspaceID)
-
-	// Force-remove kills and removes in one atomic operation, bypassing
-	// the restart policy entirely.
-	err := p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true})
-	if err == nil {
-		log.Printf("Provisioner: stopped and removed container %s", name)
-		return nil
+	// KI-013 deploy safety: try new full-ID name first, then fall back to
+	// the old truncated name so pre-deploy containers are still stoppable.
+	names := []string{ContainerName(workspaceID), legacyContainerName(workspaceID)}
+	for _, name := range names {
+		// Force-remove kills and removes in one atomic operation, bypassing
+		// the restart policy entirely.
+		err := p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true})
+		if err == nil {
+			log.Printf("Provisioner: stopped and removed container %s", name)
+			return nil
+		}
+		if isContainerNotFound(err) {
+			// Try the next name (legacy fallback). If both miss, the
+			// container is genuinely gone — post-condition satisfied.
+			continue
+		}
+		if isRemovalInProgress(err) {
+			// Another concurrent caller is already removing this container.
+			log.Printf("Provisioner: container %s removal already in progress (no-op)", name)
+			return nil
+		}
+		// Real failure: daemon timeout, socket EOF, ctx cancellation, etc.
+		log.Printf("Provisioner: force-remove failed for %s: %v", name, err)
+		return fmt.Errorf("force-remove %s: %w", name, err)
 	}
-	if isContainerNotFound(err) {
-		// Container was already gone — the post-condition we want is
-		// satisfied. Don't surface as an error.
-		log.Printf("Provisioner: container %s already gone (no-op)", name)
-		return nil
-	}
-	if isRemovalInProgress(err) {
-		// Another concurrent caller (orphan sweeper, sibling cascade
-		// delete, manual `docker rm -f`) is already removing this
-		// container. The post-condition is the same as success: the
-		// container WILL be gone shortly. Surfacing this as a 500 on
-		// cascade-delete causes UI confusion ("workspace marked
-		// removed, but stop call(s) failed — please retry") even
-		// though retrying would just race the same in-flight removal.
-		log.Printf("Provisioner: container %s removal already in progress (no-op)", name)
-		return nil
-	}
-	// Real failure: daemon timeout, socket EOF, ctx cancellation, etc.
-	// Caller (workspace_crud.stopAndRemove, orphan_sweeper.sweepOnce)
-	// must propagate this so they can skip the follow-up RemoveVolume.
-	log.Printf("Provisioner: force-remove failed for %s: %v", name, err)
-	return fmt.Errorf("force-remove %s: %w", name, err)
+	// Both names missed — container was already gone.
+	log.Printf("Provisioner: container %s already gone (no-op)", ContainerName(workspaceID))
+	return nil
 }

 // IsRunning checks if a workspace container is currently running.
@@ -1444,16 +1494,20 @@ func RunningContainerName(ctx context.Context, cli *client.Client, workspaceID s
 	if cli == nil {
 		return "", ErrNoBackend
 	}
-	name := ContainerName(workspaceID)
-	info, err := cli.ContainerInspect(ctx, name)
-	if err != nil {
-		if isContainerNotFound(err) {
-			return "", nil
+	// KI-013 deploy safety: new full-ID name first, then fall back to the
+	// old truncated name so pre-deploy containers are still discoverable.
+	names := []string{ContainerName(workspaceID), legacyContainerName(workspaceID)}
+	for _, name := range names {
+		info, err := cli.ContainerInspect(ctx, name)
+		if err != nil {
+			if isContainerNotFound(err) {
+				continue
+			}
+			return "", err
+		}
+		if info.State.Running {
+			return name, nil
 		}
-		return "", err
-	}
-	if info.State.Running {
-		return name, nil
 	}
 	return "", nil
 }
@@ -425,7 +425,7 @@ func TestContainerName(t *testing.T) {
 	}{
 		{"short", "ws-short"},
 		{"exactly12ch", "ws-exactly12ch"},
-		{"longer-than-twelve-characters", "ws-longer-than-"},
+		{"longer-than-twelve-characters", "ws-longer-than-twelve-characters"},
 		{"abc", "ws-abc"},
 	}

@@ -437,6 +437,17 @@ func TestContainerName(t *testing.T) {
 	}
 }

+// TestContainerName_DistinctSamePrefix12 is a regression guard for KI-013:
+// two UUIDs sharing the same first 12 characters must produce distinct
+// container names (the old 12-char truncation caused collisions).
+func TestContainerName_DistinctSamePrefix12(t *testing.T) {
+	id1 := "123456789abc-4def-1234-567890abcdef"
+	id2 := "123456789abc-4def-1234-567890abcdf0"
+	if ContainerName(id1) == ContainerName(id2) {
+		t.Fatalf("ContainerName must differ for same-first-12 UUIDs: both = %q", ContainerName(id1))
+	}
+}
+
 // TestConfigVolumeName verifies config volume naming.
 func TestConfigVolumeName(t *testing.T) {
 	tests := []struct {
@@ -445,7 +456,7 @@ func TestConfigVolumeName(t *testing.T) {
 	}{
 		{"short", "ws-short-configs"},
 		{"exactly12ch", "ws-exactly12ch-configs"},
-		{"longer-than-twelve-characters", "ws-longer-than--configs"},
+		{"longer-than-twelve-characters", "ws-longer-than-twelve-characters-configs"},
 		{"abc", "ws-abc-configs"},
 	}

@@ -457,10 +468,19 @@ func TestConfigVolumeName(t *testing.T) {
 	}
 }

+// TestConfigVolumeName_DistinctSamePrefix12 is a regression guard for KI-013.
+func TestConfigVolumeName_DistinctSamePrefix12(t *testing.T) {
+	id1 := "123456789abc-4def-1234-567890abcdef"
+	id2 := "123456789abc-4def-1234-567890abcdf0"
+	if ConfigVolumeName(id1) == ConfigVolumeName(id2) {
+		t.Fatalf("ConfigVolumeName must differ for same-first-12 UUIDs: both = %q", ConfigVolumeName(id1))
+	}
+}
+
 // ---------- #12 — claude-sessions volume naming ----------

 // TestClaudeSessionVolumeName_Deterministic: same ID → same volume name, and
-// the name follows the ws-<id[:12]>-claude-sessions shape used everywhere
+// the name follows the ws-<id>-claude-sessions shape used everywhere
 // else in the provisioner.
 func TestClaudeSessionVolumeName_Deterministic(t *testing.T) {
 	tests := []struct {
@@ -469,7 +489,7 @@ func TestClaudeSessionVolumeName_Deterministic(t *testing.T) {
 	}{
 		{"short", "ws-short-claude-sessions"},
 		{"exactly12ch", "ws-exactly12ch-claude-sessions"},
-		{"longer-than-twelve-characters", "ws-longer-than--claude-sessions"},
+		{"longer-than-twelve-characters", "ws-longer-than-twelve-characters-claude-sessions"},
 		{"abc", "ws-abc-claude-sessions"},
 	}
 	for _, tt := range tests {
@@ -484,6 +504,15 @@ func TestClaudeSessionVolumeName_Deterministic(t *testing.T) {
 	}
 }

+// TestClaudeSessionVolumeName_DistinctSamePrefix12 is a regression guard for KI-013.
+func TestClaudeSessionVolumeName_DistinctSamePrefix12(t *testing.T) {
+	id1 := "123456789abc-4def-1234-567890abcdef"
+	id2 := "123456789abc-4def-1234-567890abcdf0"
+	if ClaudeSessionVolumeName(id1) == ClaudeSessionVolumeName(id2) {
+		t.Fatalf("ClaudeSessionVolumeName must differ for same-first-12 UUIDs: both = %q", ClaudeSessionVolumeName(id1))
+	}
+}
+
 // TestClaudeSessionVolumeName_DistinctFromConfig ensures we never alias the
 // claude-sessions volume onto the config volume (deleting one must not wipe
 // the other in RemoveVolume's cleanup path).
@@ -1163,3 +1163,109 @@ func TestSanitizeUTF8(t *testing.T) {
 		t.Errorf("sanitizeUTF8 did not produce valid UTF-8: %x", []byte(out))
 	}
 }
+
+// ── TestClassifyTaskState ───────────────────────────────────────────────────
+
+func TestClassifyTaskState_NoStatus(t *testing.T) {
+	result := map[string]json.RawMessage{"other": json.RawMessage(`"x"`)}
+	if got := classifyTaskState(result); got != "" {
+		t.Errorf("classifyTaskState(no status) = %q, want empty", got)
+	}
+}
+
+func TestClassifyTaskState_OKStates(t *testing.T) {
+	for _, state := range []string{"", "submitted", "working", "completed"} {
+		result := map[string]json.RawMessage{
+			"status": json.RawMessage(`{"state":"` + state + `"}`),
+		}
+		if got := classifyTaskState(result); got != "" {
+			t.Errorf("classifyTaskState(%q) = %q, want empty (OK state)", state, got)
+		}
+	}
+}
+
+func TestClassifyTaskState_FailureState(t *testing.T) {
+	result := map[string]json.RawMessage{
+		"status": json.RawMessage(`{"state":"failed"}`),
+	}
+	if got := classifyTaskState(result); got != "failed" {
+		t.Errorf("classifyTaskState(failed) = %q, want failed", got)
+	}
+}
+
+func TestClassifyTaskState_MalformedStatus(t *testing.T) {
+	result := map[string]json.RawMessage{
+		"status": json.RawMessage(`{broken`),
+	}
+	if got := classifyTaskState(result); got != "" {
+		t.Errorf("classifyTaskState(malformed) = %q, want empty", got)
+	}
+}
+
+// ── TestIsEmptyResponse ─────────────────────────────────────────────────────
+
+func TestIsEmptyResponse_EmptyBody(t *testing.T) {
+	if !isEmptyResponse([]byte{}) {
+		t.Error("isEmptyResponse(empty) should be true")
+	}
+}
+
+func TestIsEmptyResponse_NoResponseGenerated(t *testing.T) {
+	if !isEmptyResponse([]byte(`(no response generated)`)) {
+		t.Error("isEmptyResponse(no-response-generated) should be true")
+	}
+}
+
+func TestIsEmptyResponse_TextFieldEmpty(t *testing.T) {
+	if !isEmptyResponse([]byte(`{"result":{"parts":[{"text":""}]}}`)) {
+		t.Error("isEmptyResponse(empty text field) should be true")
+	}
+}
+
+func TestIsEmptyResponse_TextFieldNoResponse(t *testing.T) {
+	if !isEmptyResponse([]byte(`{"result":{"parts":[{"text":"(no response generated)"}]}}`)) {
+		t.Error("isEmptyResponse(text=no-response-generated) should be true")
+	}
+}
+
+func TestIsEmptyResponse_HasContent(t *testing.T) {
+	if isEmptyResponse([]byte(`{"result":{"parts":[{"text":"hello"}]}}`)) {
+		t.Error("isEmptyResponse(with content) should be false")
+	}
+}
+
+// ── TestA2AErrorFromBody ────────────────────────────────────────────────────
+
+func TestA2AErrorFromBody_Empty(t *testing.T) {
+	if got := a2aErrorFromBody([]byte{}); got != "" {
+		t.Errorf("a2aErrorFromBody(empty) = %q, want empty", got)
+	}
+}
+
+func TestA2AErrorFromBody_JSONRPCMessage(t *testing.T) {
+	body := []byte(`{"error":{"code":-32603,"message":"internal error"}}`)
+	if got := a2aErrorFromBody(body); got != "internal error" {
+		t.Errorf("a2aErrorFromBody(JSON-RPC) = %q, want internal error", got)
+	}
+}
+
+func TestA2AErrorFromBody_PlainString(t *testing.T) {
+	body := []byte(`{"error":"something went wrong"}`)
+	if got := a2aErrorFromBody(body); got != "something went wrong" {
+		t.Errorf("a2aErrorFromBody(plain) = %q, want something went wrong", got)
+	}
+}
+
+func TestA2AErrorFromBody_NoError(t *testing.T) {
+	body := []byte(`{"result":"ok"}`)
+	if got := a2aErrorFromBody(body); got != "" {
+		t.Errorf("a2aErrorFromBody(no error) = %q, want empty", got)
+	}
+}
+
+func TestA2AErrorFromBody_InvalidJSON(t *testing.T) {
+	body := []byte(`{broken`)
+	if got := a2aErrorFromBody(body); got != "" {
+		t.Errorf("a2aErrorFromBody(invalid) = %q, want empty", got)
+	}
+}